1 Brief explanation

Every boxplot means a monitoring point (Ponto de monitoramento (or PM) in portuguese). My goal here is to analyze the evolution between decades of each water quality parameter that compounds the Water Quality Index (WQI).

The river flows in the east-west direction as shown in the image below.

The logic behind the sorting in the boxplots is because of 2 main reasons:

  1. The original monitoring point isn’t easy to understand (8 digits, like 87409900)
  2. Changing the original nomenclature to PM1, PM2 (…) makes it easier to understand that the last point has water contributions of every other point upstream.

Some features that I want to add: * If the parameter is x, then use x’s classes (with its own classes background color plotted) * Define the timescale, should act just like a filter

# plan_wide_19902020 %>%
#   filter(ANO_COLETA > "1990" &
#          ANO_COLETA <= "2000")

2 Anotações de coisas por fazer:

  • Descobrir como colocar as estações no sentido correto montante -> jusante nos sumários

87398500, 87398980, 87398900, 87398950, 87405500, 87406900, 87409900

  • Aprender a segmentar o meu dataset por períodos
  • aprender a criar uma nova coluna com a segmentação dos períodos
  • maybe use ~facet.grid
  • aprender a colocar a legenda dentro do gráfico
    • reduzir o tamanho da legenda
  • corrigir os valores 0 de IQA pra NA
  • descobrir como conseguir a equação do lm
  • aprender a pivotar o sumário -> meu sumário do google docs ta batendo direitinho com o do R
  • descobrir se há outros TCCs com disponibilização de códigos
  • Namon tá com com casa decimal "," e ptot tá com "."
  • correlação forte entre condutividade e Namon/Ptot/DBO
1990-2000 2000-2010 2010-2020
1990-2000 2000-2010 2010-2020

3 Instalar os pacotes

# install.packages(tidyverse)

3.1 acessar os pacotes

# library(readr)
# library(rmarkdown)
# # library(qboxplot)
# library(readxl)
# library(pillar)
# library(dplyr)
# library(tidyverse)
# library(gapminder)
# library(knitr)
# library(kableExtra)
# library(ggpubr)
# library(gridExtra)
# library(modelsummary)
# library(gtsummary)
# library(GGally)
pacman::p_load(readr, rmarkdown, readxl,
               pillar, dplyr, tidyverse,
               gapminder, knitr, kableExtra,
               gridExtra, #modelsummary, 
               gtsummary, ggplot2,
               ggbeeswarm, GGally)
# pacman::p_load(tibbletime)
knitr::knit_hooks$set(time_it = local({
   now <- NULL
   function(before, options) {
      if (before) {
         # record the current time before each chunk
         now <<- Sys.time()
      } else {
         # calculate the time difference after a chunk
         res <- difftime(Sys.time(), now)
         # return a character string to show the time
         paste("Time for this code chunk to run:", res)
      }
   }
}))

knitr::opts_chunk$set(time_it = TRUE)

3.2 importando a planilha

Time for this code chunk to run: 1.23481702804565

Time for this code chunk to run: 0.627563953399658

4 data wrangling

# Como há dados faltantes, no cálculo entre o produto das colunas, ele acaba interpretando como se fosse zero, mas na verdade é NA
plan_wide_19902020 <- plan_wide_19902020 %>% 
   mutate(IQA = ifelse(IQA == 0, NA, IQA))

Time for this code chunk to run: 0.0324699878692627

Time for this code chunk to run: 0.00612711906433105

Time for this code chunk to run: 0.0100100040435791

5 setting theme

theme_grafs <- function(bg = "white", 
                        coloracao_letra = "black") {
   theme(
      plot.title = element_text(
         hjust = 0.5,
         color = coloracao_letra,
         size = 19),
      
      axis.title.x = 
         # element_text(
         # color = coloracao_letra,
         # size = 15,
         # angle = 0,),
         element_blank(),
      axis.title.y = element_text(
         color = coloracao_letra,
         size = 15,
         angle = 90),
      
      axis.text.x = element_text(
         color = coloracao_letra,
         size = 17),
      axis.text.y = element_text(
         color = coloracao_letra,
         size = 17,
         angle = 0),
      
      panel.background = element_rect(fill = bg),
      plot.background = element_rect(fill = bg),
      plot.margin = margin(l = 5, r = 10,
                           b = 5, t = 5)
   )
}

Time for this code chunk to run: 0.0103950500488281

6 setting different timescales

Time for this code chunk to run: 0.00690507888793945

7 setting sumaries

Time for this code chunk to run: 0.0142819881439209

8 Parâmetros físico-químicos

8.0.1 Oxigênio Dissolvido

# par_od <- plan_wide_19902020 %>%
#   select(CODIGO, `Oxigênio dissolvido`) %>%
#   group_nest(CODIGO)

# data %>%  o que o Pat fez no CC156 21min56s
#   highlight_key(., ~) %>% 
#   ggplot()
# oxig_p1 <- p1 %>% 
#   select(CODIGO, `Oxigênio dissolvido`)
# 
# par_od <- plan_wide_19902020 %>% 
#   select(CODIGO, ) %>% 
#   group_by(CODIGO)

# parametros_IQA

# parametros <- colnames(parametros_IQA)

# base_od <- function(titulo = "Título") {
#   annotate("rect",
#            xmin = -Inf, xmax = Inf,
#            ymin = -Inf, ymax = 2,
#            alpha = 1,
#            fill = "#ac5079")+ # >pior classe
#     annotate("rect",
#              xmin = -Inf, xmax = Inf,
#              ymin = 2, ymax = 4,
#              alpha = 1,
#              fill = "#eb5661")+ #classe 4
#     annotate("rect",
#              xmin = -Inf, xmax = Inf,
#              ymin = 4, ymax = 5,
#              alpha=1,
#              fill="#fcf7ab")+ #classe 3
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=5,
#              ymax=6,
#              alpha=1,
#              fill="#70c18c")+ #classe 2
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=6,
#              ymax=Inf,
#              alpha=1,
#              fill="#8dcdeb")+ #classe 1
#     stat_boxplot(
#       geom = 'errorbar',
#       width=0.3,
#       position = position_dodge(width = 0.65)
#     )+
#     labs(
#       title = titulo,
#       x = "Estação",
#       y = "mg/L"
#     )+
#     geom_quasirandom(
#       size = 1.2,
#       alpha = .25,
#       width = .07,
#     )+
#     scale_y_continuous(
#       expand = expansion(mult = c(0,0)),
#       n.breaks = 11,
#       limits = c(-1,21)
#     )+
#     scale_x_discrete(limits = c("87398500",
#                                 "87398980",
#                                 "87398900",
#                                 "87398950",
#                                 "87405500",
#                                 "87406900",
#                                 "87409900"),
#                      labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
#     )+
#     geom_smooth(method = "lm",
#                 se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
#                 aes(group=1),
#                 alpha=.5,
#                 na.rm = TRUE,
#                 size = 1)
# }

# plan_wide_19902020 %>%
#   ggplot(
#     aes(CODIGO, `Oxigênio dissolvido`)
#   )+
#   geom_boxplot(
#       fill = '#F8F8FF',
#       color = "black",
#       outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
#       width= 0.7
#     )+
#   base_od("Oxigênio 1990")

Time for this code chunk to run: 0.0247759819030762

Oxigênio Dissolvido no período 1990-2000Time for this code chunk to run: 2.91411900520325

Time for this code chunk to run: 0.761914968490601

Time for this code chunk to run: 0.71396803855896

grid.arrange(od_p1, od_p2, od_p3, ncol = 3)

Oxigênio Dissolvido no período 1990-2020Time for this code chunk to run: 2.1074800491333

ggsave("od_p1.png",
       plot = od_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Removed 7 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing missing values (position_quasirandom).
ggsave("od_p2.png",
       plot = od_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing missing values (position_quasirandom).
ggsave("od_p3.png",
       plot = od_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing missing values (position_quasirandom).
ggsave("od_3periodos_2.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(od_p1, od_p2, od_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing missing values (position_quasirandom).
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Removed 54 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing missing values (position_quasirandom).
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Removed 31 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 6.25169396400452

Time for this code chunk to run: 0.0084221363067627

Time for this code chunk to run: 0.840864896774292

Time for this code chunk to run: 0.734740972518921

Time for this code chunk to run: 0.705160856246948

grid.arrange(iqaod_p1, iqaod_p2, iqaod_p3, ncol = 3)

Time for this code chunk to run: 2.12542200088501

## # A tibble: 7 × 8
##   par    `87398500` `87398900` `87398950` `87398980` `87405500` 874069…¹ 87409…²
##   <chr>       <dbl>      <dbl>      <dbl>      <dbl>      <dbl>    <dbl>   <dbl>
## 1 min          0.8        2          2.5        4.2        0.1      0.1     0.1 
## 2 q1           4.9        5.6        4.4        6          1.9      0.25    1.4 
## 3 median       6.4        6.9        5.95       6.3        4.2      2.6     2.9 
## 4 mean         5.99       6.78       5.98       7.01       4.22     2.98    3.60
## 5 q3           7.3        8          7.1        8.2        6        5       5.65
## 6 max         10.8       10.5       10.3       12.1       19.9     10.2    11.1 
## 7 n          101        101         68         30         97       32      65   
## # … with abbreviated variable names ¹​`87406900`, ²​`87409900`
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500   0.4   3.5   4.9   5.01  6.65  10.9
## 2 87398900   1.9   4     5.5   5.33  6.6   12  
## 3 87398950   1.7   3.2   5.3   5.06  6.18   8.9
## 4 87398980   1.2   3.8   5.6   5.38  6.6    9.2
## 5 87405500   0.2   1.4   2.55  3.28  4     14.2
## 6 87406900   0     1.1   1.9   2.59  3.15  16  
## 7 87409900   0     0.7   2.3   3.12  3.7   10.6
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  0.38 3.11    4.41  4.57  6.2   12.4
## 2 87398900  3.52 5.25    5.96  6.61  7.3   13.8
## 3 87398950  1.62 3.68    4.92  5.28  6.64  11.9
## 4 87398980  3.37 5.5     6.17  6.48  7.14  13.1
## 5 87405500  0.2  1.3     2.53  2.83  3.66   9.8
## 6 87406900  0.1  0.865   2.4   2.43  3.05   9.1
## 7 87409900  0.1  0.92    2.03  2.43  3.5    8.1

Time for this code chunk to run: 0.336023092269897

8.0.2 Demanda Bioquímica de Oxigênio

Time for this code chunk to run: 0.862489938735962

Time for this code chunk to run: 0.865778923034668

Time for this code chunk to run: 0.783010959625244

Time for this code chunk to run: 0.860697031021118

Time for this code chunk to run: 1.13407206535339

Time for this code chunk to run: 0.725468873977661

grid.arrange(dbo_p1, dbo_p2, dbo_p3, ncol = 3)

Time for this code chunk to run: 2.01562404632568

(sum_dbo_p1 <- plan_wide_19902020 %>%
   select(CODIGO, DBO, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(DBO, 
           na.rm = TRUE),
     q1 = 
       quantile(DBO, 0.25, 
                na.rm = TRUE),
     median = 
       median(DBO, 
              na.rm = TRUE),
     mean = 
       mean(DBO, 
            na.rm= TRUE),
     q3 = 
       quantile(DBO, 0.75, 
                na.rm = TRUE),
     max = 
       max(DBO, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500     1     1      2  1.86   2      13
## 2 87398900     1     1      1  1.52   2       6
## 3 87398950     1     1      1  1.66   2       6
## 4 87398980     1     1      1  1.13   1       2
## 5 87405500     1     2      3  5.37   5      64
## 6 87406900     1     4      5  9     11      26
## 7 87409900     2     3      4  6.97   9.5    31
(sum_dbo_p2 <- plan_wide_19902020 %>%
    select(CODIGO, DBO, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(DBO, 
            na.rm = TRUE),
      q1 = 
        quantile(DBO, 0.25, 
                 na.rm = TRUE),
      median = 
        median(DBO, 
               na.rm = TRUE),
      mean = 
        mean(DBO, 
             na.rm= TRUE),
      q3 = 
        quantile(DBO, 0.75, 
                 na.rm = TRUE),
      max = 
        max(DBO, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500     1     1      1  1.58   2       5
## 2 87398900     1     1      1  1.40   2       5
## 3 87398950     1     1      1  1.66   2       5
## 4 87398980     1     1      1  1.30   1       5
## 5 87405500     1     2      4  4.67   6.5    14
## 6 87406900     1     3      5  6.53   8      28
## 7 87409900     1     3      6  6.31   9      15
(sum_dbo_p3 <- plan_wide_19902020 %>%
    select(CODIGO, DBO, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(DBO, 
            na.rm = TRUE),
      q1 = 
        quantile(DBO, 0.25, 
                 na.rm = TRUE),
      median = 
        median(DBO, 
               na.rm = TRUE),
      mean = 
        mean(DBO, 
             na.rm= TRUE),
      q3 = 
        quantile(DBO, 0.75, 
                 na.rm = TRUE),
      max = 
        max(DBO, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500     1     1    1.5  2.15  3        7
## 2 87398900     1     1    1    1.51  2        5
## 3 87398950     1     1    2    2.65  2       18
## 4 87398980     1     1    1    1.32  2        2
## 5 87405500     1     3    4    5.28  6.25    21
## 6 87406900     1     3    5    6.58 10       24
## 7 87409900     1     3    4.5  6.18  8       18

Time for this code chunk to run: 0.287739038467407

ggsave("dbo_p1.png",
       plot = dbo_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).
## Removed 22 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing missing values (position_quasirandom).
ggsave("dbo_p2.png",
       plot = dbo_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 30 rows containing non-finite values (stat_boxplot).
## Removed 30 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 30 rows containing missing values (position_quasirandom).
ggsave("dbo_p3.png",
       plot = dbo_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Removed 8 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 8 rows containing missing values (position_quasirandom).
ggsave("dbo_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(dbo_p1, dbo_p2, dbo_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).
## Removed 22 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 22 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 30 rows containing non-finite values (stat_boxplot).
## Removed 30 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 30 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Removed 8 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 8 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 5.60738205909729

8.0.3 Fósforo total

(ptot_p1<-ggplot(plan_wide_19902020%>% 
                   filter(ANO_COLETA>"1990" &
                             ANO_COLETA<="2000"),
                 aes(CODIGO,
                     `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 1990-2000",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                       trans = "log10")+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.779021024703979

(ptot_p2 <- ggplot(plan_wide_19902020%>% 
                      filter(ANO_COLETA>"2000" &
                                ANO_COLETA<="2010"),
                   aes(CODIGO,
                       `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 2000-2010",
         x="Estação",
         y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                      trans = "log10")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.779571056365967

(ptot_p3 <- ggplot(plan_wide_19902020%>% 
                      filter(ANO_COLETA>"2010" &
                                ANO_COLETA<="2020"),
                   aes(CODIGO,
                       `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 2010-2020",
         x="Estação",
         y="mg/L")+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                       trans = "log10")+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.690973997116089

grid.arrange(ptot_p1, ptot_p2, ptot_p3, ncol = 3)

Time for this code chunk to run: 3.34674596786499

(sum_ptot_p1 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Fósforo total`, na.rm = TRUE),
     q1 = 
       quantile(`Fósforo total`, 0.25, na.rm = TRUE),
     median = 
       median(`Fósforo total`, na.rm = TRUE),
     mean = 
       mean(`Fósforo total`, na.rm= TRUE),
     q3 = 
       quantile(`Fósforo total`, 0.75, na.rm = TRUE),
     max = 
       max(`Fósforo total`, na.rm = TRUE)))
## # A tibble: 7 × 7
##   CODIGO      min     q1 median   mean     q3   max
##   <chr>     <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 87398500 0.0097 0.0593 0.0881 0.123  0.14   0.863
## 2 87398900 0.0023 0.0468 0.0678 0.0747 0.0883 0.247
## 3 87398950 0.0202 0.0544 0.0737 0.0751 0.0904 0.179
## 4 87398980 0.01   0.0254 0.0547 0.0708 0.114  0.189
## 5 87405500 0.017  0.171  0.281  0.417  0.492  2.32 
## 6 87406900 0.156  0.270  0.508  0.785  1.07   2.79 
## 7 87409900 0.107  0.258  0.384  0.489  0.712  1.53
(sum_ptot_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Fósforo total`, na.rm = TRUE),
      q1 = 
        quantile(`Fósforo total`, 0.25, na.rm = TRUE),
      median = 
        median(`Fósforo total`, na.rm = TRUE),
      mean = 
        mean(`Fósforo total`, na.rm= TRUE),
      q3 = 
        quantile(`Fósforo total`, 0.75, na.rm = TRUE),
      max = 
        max(`Fósforo total`, na.rm = TRUE)))
## # A tibble: 7 × 7
##   CODIGO      min     q1 median  mean    q3   max
##   <chr>     <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500 0.025  0.094   0.131 0.148 0.16  0.637
## 2 87398900 0.015  0.0764  0.104 0.140 0.164 0.646
## 3 87398950 0.036  0.116   0.171 0.180 0.207 0.485
## 4 87398980 0.0115 0.052   0.076 0.101 0.103 1    
## 5 87405500 0.046  0.261   0.406 0.547 0.681 1.98 
## 6 87406900 0.056  0.338   0.599 0.752 0.967 3.49 
## 7 87409900 0.043  0.325   0.624 0.677 0.989 1.57
(sum_ptot_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Fósforo total`, na.rm = TRUE),
      q1 = 
        quantile(`Fósforo total`, 0.25, na.rm = TRUE),
      median = 
        median(`Fósforo total`, na.rm = TRUE),
      mean = 
        mean(`Fósforo total`, na.rm= TRUE),
      q3 = 
        quantile(`Fósforo total`, 0.75, na.rm = TRUE),
      max = 
        max(`Fósforo total`, na.rm = TRUE)))
## # A tibble: 7 × 7
##   CODIGO     min     q1 median  mean    q3   max
##   <chr>    <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500 0.061 0.118   0.163 0.166 0.186 0.381
## 2 87398900 0.057 0.0935  0.130 0.163 0.168 0.444
## 3 87398950 0.07  0.132   0.156 0.292 0.221 3.11 
## 4 87398980 0.019 0.0625  0.106 0.144 0.170 0.59 
## 5 87405500 0.013 0.187   0.332 0.361 0.45  0.803
## 6 87406900 0.089 0.254   0.364 0.448 0.560 1.26 
## 7 87409900 0.203 0.259   0.369 0.488 0.564 1.7

Time for this code chunk to run: 0.717962980270386

ggsave("ptot_p1.png",
       plot = ptot_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Removed 47 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 47 rows containing missing values (position_quasirandom).
ggsave("ptot_p2.png",
       plot = ptot_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Removed 31 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing missing values (position_quasirandom).
ggsave("ptot_p3.png",
       plot = ptot_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Removed 54 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing missing values (position_quasirandom).
ggsave("ptot_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(ptot_p1, ptot_p2, ptot_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 47 rows containing non-finite values (stat_boxplot).
## Removed 47 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 47 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Removed 31 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 31 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).
## Removed 54 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 54 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 7.27583384513855

8.0.4 Escherichia coli

(ecoli_p1 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"1990" &
                                 ANO_COLETA<="2000"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 1990-2000",
         x="Estação",
         y="NMP/100mL")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.920573949813843

(ecoli_p2 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2000" &
                                 ANO_COLETA<="2010"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 2000-2010",
         x="Estação",
         y="NMP/100mL")+
    scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                       n.breaks = 9,
                       limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                       trans = "log10",
                       labels = scales::number_format(accuracy = 1,
                                                      decimal.mark = ",",
                                                      big.mark = " "))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.902156829833984

(ecoli_p3 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2010" &
                                 ANO_COLETA<="2020"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 2010-2020",
         x="Estação",
         y="NMP/100mL")+
    scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                       n.breaks = 9,
                       limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                       trans = "log10",
                       labels = scales::number_format(accuracy = 1,
                                                      decimal.mark = ",",
                                                      big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.759981155395508

grid.arrange(ecoli_p1, ecoli_p2, ecoli_p3, ncol = 3)

Time for this code chunk to run: 2.19332003593445

(sum_ecoli_p1 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"1990" &
              ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Escherichia coli`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Escherichia coli`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Escherichia coli`, 
              na.rm = TRUE),
     mean = 
       mean(`Escherichia coli`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Escherichia coli`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Escherichia coli`, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median   mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 87398500  32   136     240   854.    720 19200
## 2 87398900  16    68     160   548.    480  7760
## 3 87398950   2.4  12.8   268  4039.  10000 28000
## 4 87398980   4   160     243. 2907.    446 25600
## 5 87405500   1.6  12.8    24   545.    128 18400
## 6 87406900  13.6  61.6   192   718.    414 12800
## 7 87409900   2.4  12.8    64    97.7   128   720
(sum_ecoli_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Escherichia coli`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Escherichia coli`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Escherichia coli`, 
               na.rm = TRUE),
      mean = 
        mean(`Escherichia coli`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Escherichia coli`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Escherichia coli`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median   mean     q3    max
##   <chr>    <dbl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 87398500  21.6   91    150   1335.   308   27200
## 2 87398900  11     70    133.   444.   414.   2600
## 3 87398950  20    400    720    935.  1120    5500
## 4 87398980  24    110.   195    410.   289.   8800
## 5 87405500   4.7  162   2400  25445. 12950  490000
## 6 87406900   8    172  12800  66370. 62300  650000
## 7 87409900  16   7355. 35500  72440. 68750  460000
(sum_ecoli_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Escherichia coli`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Escherichia coli`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Escherichia coli`, 
               na.rm = TRUE),
      mean = 
        mean(`Escherichia coli`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Escherichia coli`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Escherichia coli`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO      min      q1 median    mean      q3      max
##   <chr>     <dbl>   <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 87398500   90     155.    260     409.    451     2420 
## 2 87398900   10      52.8   107     245.    313     1553.
## 3 87398950  108.    250     487    1424.   1553.   10462 
## 4 87398980   40.8   140.    242.    529.    738.    2400 
## 5 87405500  632    8965   19232. 109992.  70750  1400000 
## 6 87406900 1440   23100   34500  230828. 140500  3400000 
## 7 87409900 2000   20100   38400   83128.  83680   345000

Time for this code chunk to run: 0.296850204467773

ggsave("ecoli_p1.png",
       plot = ecoli_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Removed 15 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing missing values (position_quasirandom).
ggsave("ecoli_p2.png",
       plot = ecoli_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 66 rows containing non-finite values (stat_boxplot).
## Removed 66 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 66 rows containing missing values (position_quasirandom).
ggsave("ecoli_p3.png",
       plot = ecoli_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).
ggsave("ecoli_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(ecoli_p1, ecoli_p2, ecoli_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Removed 15 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 66 rows containing non-finite values (stat_boxplot).
## Removed 66 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 66 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 6.64437103271484

8.0.5 Nitrogênio amoniacal

(namon_p1 <- ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA>"1990" &
                               ANO_COLETA<="2000"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3.7,
             ymax=13.3,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=3.7,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 1990-2000",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.742768049240112

(namon_p2 <- ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA>"2000" &
                               ANO_COLETA<="2010"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=3.7,
            ymax=13.3,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=3.7,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 2000-2010",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.740861177444458

(namon_p3 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2010" &
                                 ANO_COLETA<="2020"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=3.7,
            ymax=13.3,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=3.7,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 2010-2020",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.723899841308594

grid.arrange(namon_p1, namon_p2, namon_p3, ncol = 3)

Time for this code chunk to run: 2.29036808013916

(sum_namon_p1 <- plan_wide_19902020 %>%
   select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Nitrogênio total`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Nitrogênio total`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Nitrogênio total`, 
              na.rm = TRUE),
     mean = 
       mean(`Nitrogênio total`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Nitrogênio total`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Nitrogênio total`, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500 0.44  0.842  1.00  1.22   1.34  3.81
## 2 87398900 0.22  0.82   1     1.09   1.25  4.86
## 3 87398950 0.51  0.83   1.02  1.06   1.19  2.16
## 4 87398980 0.549 0.68   0.755 0.872  1.01  1.85
## 5 87405500 0.51  1.53   2.94  5.27   6.77 21.6 
## 6 87406900 1.34  2.60   4.56  7.58  11.2  29.1 
## 7 87409900 0.5   1.98   4.29  5.18   7.01 19.6
(sum_namon_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Nitrogênio total`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Nitrogênio total`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Nitrogênio total`, 
               na.rm = TRUE),
      mean = 
        mean(`Nitrogênio total`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Nitrogênio total`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Nitrogênio total`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500 0.18  0.885  0.992  1.80  1.46 23.2 
## 2 87398900 0.48  0.894  1.13   1.38  1.57  7.92
## 3 87398950 0.57  1.26   1.45   1.43  1.71  1.98
## 4 87398980 0.19  0.685  0.79   1.05  1.10  5.2 
## 5 87405500 0.968 2      3.29   5.45  6.60 21.7 
## 6 87406900 0.77  2.4    4.54   7.30 10.2  39.1 
## 7 87409900 1.62  2.5    6.97   7.92 10.6  21.5
(sum_namon_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Nitrogênio total`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Nitrogênio total`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Nitrogênio total`, 
               na.rm = TRUE),
      mean = 
        mean(`Nitrogênio total`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Nitrogênio total`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Nitrogênio total`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500 0.222 0.89    1.11  1.24  1.41  2.56
## 2 87398900 0.095 0.883   1.02  1.29  1.40  4.25
## 3 87398950 0.612 1.04    1.43  1.90  2.06  9.5 
## 4 87398980 0.216 0.973   1.12  1.22  1.58  2.32
## 5 87405500 1.12  2.03    3.14  4.50  5.93 22.0 
## 6 87406900 1.37  2.40    5.58  6.47  7.58 25   
## 7 87409900 1.11  3       6.15  7.29  7.75 36

Time for this code chunk to run: 0.279345989227295

ggsave("namon_p1.png",
       plot = namon_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 102 rows containing non-finite values (stat_boxplot).
## Removed 102 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 102 rows containing missing values (position_quasirandom).
ggsave("namon_p2.png",
       plot = namon_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 110 rows containing non-finite values (stat_boxplot).
## Removed 110 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 110 rows containing missing values (position_quasirandom).
ggsave("namon_p3.png",
       plot = namon_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).
## Removed 70 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 70 rows containing missing values (position_quasirandom).
ggsave("namon_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(namon_p1, namon_p2, namon_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 102 rows containing non-finite values (stat_boxplot).
## Removed 102 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 102 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 110 rows containing non-finite values (stat_boxplot).
## Removed 110 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 110 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 70 rows containing non-finite values (stat_boxplot).
## Removed 70 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 70 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 6.16957092285156

8.0.6 Turbidez

(turb_p1 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"1990" &
                              ANO_COLETA<="2000"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 1990-2000",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.830542087554932

(turb_p2 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"2000" &
                              ANO_COLETA<="2010"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 2000-2010",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.801146984100342

(turb_p3 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"2010" &
                              ANO_COLETA<="2020"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 2010-2020",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.741405010223389

grid.arrange(turb_p1, turb_p2, turb_p3, ncol = 3)

Time for this code chunk to run: 2.15637612342834

(sum_turb_p1 <- plan_wide_19902020 %>%
   select(CODIGO, Turbidez, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(Turbidez, 
           na.rm = TRUE),
     q1 = 
       quantile(Turbidez, 0.25, 
                na.rm = TRUE),
     median = 
       median(Turbidez, 
              na.rm = TRUE),
     mean = 
       mean(Turbidez, 
            na.rm= TRUE),
     q3 = 
       quantile(Turbidez, 0.75, 
                na.rm = TRUE),
     max = 
       max(Turbidez, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500   6.2  19     34.5  63.5  67     461
## 2 87398900   9    19     49.5  61.5  73.8   460
## 3 87398950   9.6  16     22    33.3  48.8   144
## 4 87398980  16    32.8   43    66.8  90.5   190
## 5 87405500   8.5  23.5   47    47.5  58     159
## 6 87406900  33    54.8   67    77.7  81.5   199
## 7 87409900   5.8  15     25    32.2  48      76
(sum_turb_p2 <- plan_wide_19902020 %>%
    select(CODIGO, Turbidez, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Turbidez, 
            na.rm = TRUE),
      q1 = 
        quantile(Turbidez, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Turbidez, 
               na.rm = TRUE),
      mean = 
        mean(Turbidez, 
             na.rm= TRUE),
      q3 = 
        quantile(Turbidez, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Turbidez, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500     9  41.2   55.5  71.1  74.2   428
## 2 87398900    39  57     78   107.  116.    475
## 3 87398950    39  47     64    96.5  90     330
## 4 87398980    24  37     50    64.5  87     176
## 5 87405500    32  46     63.5  70.3  76     341
## 6 87406900    35  49     62    69.9  75.5   284
## 7 87409900    40  45     60    70.4  90     151
(sum_turb_p3 <- plan_wide_19902020 %>%
    select(CODIGO, Turbidez, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Turbidez, 
            na.rm = TRUE),
      q1 = 
        quantile(Turbidez, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Turbidez, 
               na.rm = TRUE),
      mean = 
        mean(Turbidez, 
             na.rm= TRUE),
      q3 = 
        quantile(Turbidez, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Turbidez, 
            na.rm = TRUE))
) 
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  8.52  16.4   29    33.3  43     85 
## 2 87398900 14.8   39.2   48.3  66.7  73.4  299 
## 3 87398950 16     29.9   41    51.6  65    230 
## 4 87398980 11     19.4   33.6  39.5  42.2  110.
## 5 87405500 10.0   29.0   41    42.9  54.5  131 
## 6 87406900  9.62  23     39    41.2  52    122 
## 7 87409900  9.68  22.0   34.0  40.5  47    182.

Time for this code chunk to run: 0.293633937835693

ggsave("turb_p1.png",
       plot = turb_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 56 rows containing non-finite values (stat_boxplot).
## Removed 56 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 56 rows containing missing values (position_quasirandom).
ggsave("turb_p2.png",
       plot = turb_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 74 rows containing non-finite values (stat_boxplot).
## Removed 74 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing missing values (position_quasirandom).
ggsave("turb_p3.png",
       plot = turb_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).
ggsave("turb_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(turb_p1, turb_p2, turb_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 56 rows containing non-finite values (stat_boxplot).
## Removed 56 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 56 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 74 rows containing non-finite values (stat_boxplot).
## Removed 74 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing missing values (position_quasirandom).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 6.13875412940979

8.0.7 pH

(pH_p1 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"1990" &
                            ANO_COLETA<="2000"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 1990-2000",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.892814874649048

(pH_p2 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2000" &
                            ANO_COLETA<="2010"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 2000-2010",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.801084041595459

(pH_p3 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2010" &
                            ANO_COLETA<="2020"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 2010-2020",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.85871696472168

grid.arrange(pH_p1, pH_p2, pH_p3, ncol = 3)

Time for this code chunk to run: 2.27271890640259

(sum_pH_p1 <- plan_wide_19902020 %>%
   select(CODIGO, pH, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(pH, 
           na.rm = TRUE),
     q1 = 
       quantile(pH, 0.25, 
                na.rm = TRUE),
     median = 
       median(pH, 
              na.rm = TRUE),
     mean = 
       mean(pH, 
            na.rm= TRUE),
     q3 = 
       quantile(pH, 0.75, 
                na.rm = TRUE),
     max = 
       max(pH, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500   5    6.18   6.59  6.51  6.82   7.9
## 2 87398900   5.2  6      6.3   6.33  6.63   7.9
## 3 87398950   5.4  6.29   6.4   6.49  6.72   8.1
## 4 87398980   5.3  5.93   6.2   6.16  6.3    7.3
## 5 87405500   5    6.3    6.4   6.47  6.7    9.3
## 6 87406900   5.5  6.18   6.45  6.43  6.8    7.3
## 7 87409900   4.5  6.2    6.4   6.44  6.7    7.4
(sum_pH_p2 <- plan_wide_19902020 %>%
    select(CODIGO, pH, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(pH, 
            na.rm = TRUE),
      q1 = 
        quantile(pH, 0.25, 
                 na.rm = TRUE),
      median = 
        median(pH, 
               na.rm = TRUE),
      mean = 
        mean(pH, 
             na.rm= TRUE),
      q3 = 
        quantile(pH, 0.75, 
                 na.rm = TRUE),
      max = 
        max(pH, 
            na.rm = TRUE))
) 
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500   5.3   6.3   6.6   6.59  6.88   7.9
## 2 87398900   5.5   6.4   6.65  6.63  6.9    7.5
## 3 87398950   6     6.6   6.8   6.89  7.25   7.6
## 4 87398980   5.8   6.3   6.5   6.63  7      7.5
## 5 87405500   5.2   6.4   6.6   6.68  6.9    8.3
## 6 87406900   5.5   6.4   6.7   6.66  6.9    8.6
## 7 87409900   5.8   6.5   6.8   6.77  7      8.4
(sum_pH_p3 <- plan_wide_19902020 %>%
    select(CODIGO, pH, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(pH, 
            na.rm = TRUE),
      q1 = 
        quantile(pH, 0.25, 
                 na.rm = TRUE),
      median = 
        median(pH, 
               na.rm = TRUE),
      mean = 
        mean(pH, 
             na.rm= TRUE),
      q3 = 
        quantile(pH, 0.75, 
                 na.rm = TRUE),
      max = 
        max(pH, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  5.47  6.28   6.42  6.47  6.60  7.3 
## 2 87398900  5.68  6.36   6.5   6.57  6.84  7.4 
## 3 87398950  5.71  6.28   6.46  6.46  6.68  7   
## 4 87398980  5.42  6.10   6.36  6.39  6.6   7.2 
## 5 87405500  5.64  6.34   6.5   6.49  6.7   7.01
## 6 87406900  5.6   6.4    6.48  6.51  6.77  7.3 
## 7 87409900  5.59  6.46   6.6   6.57  6.76  7.2

Time for this code chunk to run: 0.265292882919312

ggsave("pH_p1.png",
       plot = pH_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Removed 1 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (position_quasirandom).
ggsave("pH_p2.png",
       plot = pH_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 73 rows containing non-finite values (stat_boxplot).
## Warning: Removed 73 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing missing values (position_quasirandom).
ggsave("pH_p3.png",
       plot = pH_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).
ggsave("pH_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(pH_p1, pH_p2, pH_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 73 rows containing non-finite values (stat_boxplot).
## Removed 73 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing missing values (position_quasirandom).
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Removed 14 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 14 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 6.0416579246521

8.0.8 Sólidos totais

(SolTot_p1 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"1990" &
                                ANO_COLETA<="2000"),
                     aes(CODIGO,
                         `Sólidos totais`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Sólidos totais no período 1990-2000",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.997112989425659

(SolTot_p2 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2000" &
                                ANO_COLETA<="2010"),
                     aes(CODIGO,
                         `Sólidos totais`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Sólidos totais no período 2000-2010",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

Time for this code chunk to run: 0.844728946685791

(SolTot_p3 <- ggplot(plan_wide_19902020 %>% 
                        filter(ANO_COLETA>"2010" &
                                  ANO_COLETA<="2020"),
                     aes(CODIGO,
                         `Sólidos totais`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=500,
             ymax=Inf,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=500,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Sólidos totais no período 2010-2020",
         x="Estação",
         y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.707177877426147

grid.arrange(SolTot_p1, SolTot_p2, SolTot_p3, ncol = 3)

Time for this code chunk to run: 2.09621787071228

(sum_SolTot_p1 <- plan_wide_19902020 %>%
   select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Sólidos totais`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Sólidos totais`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Sólidos totais`, 
              na.rm = TRUE),
     mean = 
       mean(`Sólidos totais`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Sólidos totais`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Sólidos totais`, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500    46  84.5   95   122.   120    510
## 2 87398900    18  74.5   97   111.   122.   474
## 3 87398950    10  76.5   91    90.9  106.   155
## 4 87398980    48  63.5   81.5 104.   126.   337
## 5 87405500    70 101    121   133.   151    361
## 6 87406900    89 118    155   165.   210    279
## 7 87409900    20  99.5  122   128.   143    381
(sum_SolTot_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Sólidos totais`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Sólidos totais`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Sólidos totais`, 
               na.rm = TRUE),
      mean = 
        mean(`Sólidos totais`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Sólidos totais`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Sólidos totais`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500    28  80     100  111.   123.   412
## 2 87398900    42  82     102. 128.   140.   489
## 3 87398950    46  94.2   108. 126.   127.   318
## 4 87398980    40  61      77   85.3   96    228
## 5 87405500    48 102     133  148.   170.   522
## 6 87406900    50 109     134. 154.   170.   670
## 7 87409900    56 112.    156  167.   190.   599
(sum_SolTot_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Sólidos totais`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Sólidos totais`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Sólidos totais`, 
               na.rm = TRUE),
      mean = 
        mean(`Sólidos totais`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Sólidos totais`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Sólidos totais`, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500    61  69      90   82.8   96    101
## 2 87398900    41  77     104  120.   127    308
## 3 87398950    45  93     101  109.   117    221
## 4 87398980    55  62.8    80   79.9   95    109
## 5 87405500    83  89.2   108. 124.   162.   195
## 6 87406900    50 106     117  135.   163    246
## 7 87409900    75 103     115  131.   145    251

Time for this code chunk to run: 0.275954008102417

ggsave("SolTot_p1.png",
       plot = SolTot_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Removed 10 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing missing values (position_quasirandom).
ggsave("SolTot_p2.png",
       plot = SolTot_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing missing values (position_quasirandom).
ggsave("SolTot_p3.png",
       plot = SolTot_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 125 rows containing non-finite values (stat_boxplot).
## Warning: Removed 125 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 125 rows containing missing values (position_quasirandom).
ggsave("SolTot_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(SolTot_p1, SolTot_p2, SolTot_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 10 rows containing missing values (position_quasirandom).
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Removed 7 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 7 rows containing missing values (position_quasirandom).
## Warning: Removed 125 rows containing non-finite values (stat_boxplot).
## Removed 125 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 125 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 5.81974291801453

8.0.9 IQA

Time for this code chunk to run: 0.833961009979248

Time for this code chunk to run: 0.79085898399353

Time for this code chunk to run: 0.681491851806641

grid.arrange(iqa_p1, iqa_p2, iqa_p3, ncol = 3)

Time for this code chunk to run: 1.88271999359131

(sum_IQA_p1 <- plan_wide_19902020 %>%
   select(CODIGO, IQA, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(IQA, 
           na.rm = TRUE),
     q1 = 
       quantile(IQA, 0.25, 
                na.rm = TRUE),
     median = 
       median(IQA, 
              na.rm = TRUE),
     mean = 
       mean(IQA, 
            na.rm= TRUE),
     q3 = 
       quantile(IQA, 0.75, 
                na.rm = TRUE),
     max = 
       max(IQA, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  27.0  35.7   40.9  40.7  46.2  52.2
## 2 87398900  27.8  37.9   42.9  43.0  48.0  58.5
## 3 87398950  32.8  36.8   41.4  43.2  48.6  61.9
## 4 87398980  29.2  35.8   40.4  40.3  44.8  51.9
## 5 87405500  24.8  34.9   41.2  40.3  46.9  57.6
## 6 87406900  24.7  31.3   37.8  37.4  44.4  49.0
## 7 87409900  23.6  31.9   37.1  38.8  46.2  55.4
(sum_IQA_p2 <- plan_wide_19902020 %>%
    select(CODIGO, IQA, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(IQA, 
            na.rm = TRUE),
      q1 = 
        quantile(IQA, 0.25, 
                 na.rm = TRUE),
      median = 
        median(IQA, 
               na.rm = TRUE),
      mean = 
        mean(IQA, 
             na.rm= TRUE),
      q3 = 
        quantile(IQA, 0.75, 
                 na.rm = TRUE),
      max = 
        max(IQA, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  27.8  34.6   40.0  39.5  43.5  48.7
## 2 87398900  28.5  35.1   37.6  38.3  40.6  48.5
## 3 87398950  21.1  29.4   32.7  32.8  36.8  44.0
## 4 87398980  24.5  35.7   39.4  39.5  43.4  52.1
## 5 87405500  19.8  28.7   31.5  31.9  35.7  48.8
## 6 87406900  17.1  25.3   29.0  29.5  32.8  44.1
## 7 87409900  16.2  20.5   26.1  25.0  29.8  33.1
(sum_IQA_p3 <- plan_wide_19902020 %>%
    select(CODIGO, IQA, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(IQA, 
            na.rm = TRUE),
      q1 = 
        quantile(IQA, 0.25, 
                 na.rm = TRUE),
      median = 
        median(IQA, 
               na.rm = TRUE),
      mean = 
        mean(IQA, 
             na.rm= TRUE),
      q3 = 
        quantile(IQA, 0.75, 
                 na.rm = TRUE),
      max = 
        max(IQA, 
            na.rm = TRUE),
      n = 
        length(IQA))
)
## # A tibble: 7 × 8
##   CODIGO     min    q1 median  mean    q3   max     n
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <int>
## 1 87398500  40.2  42.5   45.4  44.2  45.5  47.2    34
## 2 87398900  34.1  38.6   41.2  40.2  42.9  44.4    36
## 3 87398950  36.7  39.5   42.4  41.5  44.4  44.6    35
## 4 87398980  40.0  40.0   40.0  40.0  40.0  40.0    28
## 5 87405500  30.8  31.6   32.5  32.5  33.3  34.1    33
## 6 87406900  22.9  24.4   25.9  25.3  26.5  27.2    35
## 7 87409900  24.1  25.1   27.3  26.9  28.2  29.7    37

Time for this code chunk to run: 0.28761887550354

ggsave("iqa_p1.png",
       plot = iqa_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 164 rows containing missing values (position_quasirandom).
ggsave("iqa_p2.png",
       plot = iqa_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 253 rows containing missing values (position_quasirandom).
ggsave("iqa_p3.png",
       plot = iqa_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 214 rows containing missing values (position_quasirandom).
ggsave("iqa_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(iqa_p1, iqa_p2, iqa_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 164 rows containing missing values (position_quasirandom).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 253 rows containing missing values (position_quasirandom).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 214 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 5.571044921875

8.1 Testando coisas

8.1.1 Correlação

parametros_IQA <- plan_wide_19902020 %>%
  select(CODIGO,
         pH,
         DBO,
         `Nitrogênio amoniacal`,
         `Nitrogênio total`,
         `Fósforo total`,
         `Temperatura água`,
         Turbidez,
         `Sólidos totais`,
         `Oxigênio dissolvido`,
         Condutividade)

write.csv(parametros_IQA,
          "./parametros_IQA.csv",
          row.names = FALSE)

parametros_IQA %>% 
  select(-CODIGO) %>% 
  ggcorr(method = "complete.obs",
           # "pearson",
           # "pairwise",
         name = "Correlação",
         label = TRUE,
         label_alpha = TRUE,
         digits = 3,
         low = "#3B9AB2",
         mid = "#EEEEEE",
         high = "#F21A00",
         # palette = "RdYlBu",
         layout.exp = 0,
         legend.position = "left",
         label_round = 3,
         )

# Gráfico das correlações entre todos os parâmetros com significância
# correl_IQA <- parametros_IQA %>% 
#   select(-CODIGO) %>% 
#   ggpairs(title = "Correlação entre parâmetros que compõem o IQA",
#           axisLabels = "show")

Time for this code chunk to run: 0.589664936065674

8.1.2 Condutividade elétrica

(cond_elet_p1 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"1990" &
                                   ANO_COLETA<="2000"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 1990-2000",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.743114948272705

(cond_elet_p2 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"2000" &
                                   ANO_COLETA<="2010"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 2000-2010",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.718693971633911

(cond_elet_p3 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"2010" &
                                   ANO_COLETA<="2020"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 2010-2020",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)

Time for this code chunk to run: 0.638324975967407

grid.arrange(cond_elet_p1, cond_elet_p2, cond_elet_p3, ncol = 3)

Time for this code chunk to run: 1.87105894088745

(sum_cond_elet_p1 <- plan_wide_19902020 %>%
   select(CODIGO, Condutividade, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(Condutividade, 
           na.rm = TRUE),
     q1 = 
       quantile(Condutividade, 0.25, 
                na.rm = TRUE),
     median = 
       median(Condutividade, 
              na.rm = TRUE),
     mean = 
       mean(Condutividade, 
            na.rm= TRUE),
     q3 = 
       quantile(Condutividade, 0.75, 
                na.rm = TRUE),
     max = 
       max(Condutividade, 
           na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500   9.4  51.1   67    75.1  83.2 340  
## 2 87398900  10    41.5   51    55.3  64.2 160  
## 3 87398950   9    41.5   51.5  60.1  69.5 160  
## 4 87398980  11.3  42.4   52.0  53.0  67.0  83.8
## 5 87405500  25    68.7   88.2 130.  170   560  
## 6 87406900  52    88.4  133.  193.  256.  576  
## 7 87409900  29    80    110.  134.  168.  460
(sum_cond_elet_p2 <- plan_wide_19902020 %>%
    select(CODIGO, Condutividade, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Condutividade, 
            na.rm = TRUE),
      q1 = 
        quantile(Condutividade, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Condutividade, 
               na.rm = TRUE),
      mean = 
        mean(Condutividade, 
             na.rm= TRUE),
      q3 = 
        quantile(Condutividade, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Condutividade, 
            na.rm = TRUE))
)
## # A tibble: 7 × 7
##   CODIGO     min    q1 median  mean    q3   max
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 87398500  11.9  67.0   82.6  84.8 102.   164.
## 2 87398900  11    44.4   52.3  57.1  72.6  136.
## 3 87398950  39.8  58.4   76    82.3  98.3  160 
## 4 87398980   9.4  42.4   49.7  51.5  62    114.
## 5 87405500  17    77.5  107   142.  171.   679 
## 6 87406900  23.1  85.6  124.  164.  199.   619 
## 7 87409900  56.1 114.   177   200.  242    454
(sum_cond_elet_p3 <- plan_wide_19902020 %>%
    select(CODIGO, Condutividade, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Condutividade, 
            na.rm = TRUE),
      q1 = 
        quantile(Condutividade, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Condutividade, 
               na.rm = TRUE),
      mean = 
        mean(Condutividade, 
             na.rm= TRUE),
      q3 = 
        quantile(Condutividade, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Condutividade, 
            na.rm = TRUE),
      n = 
        length(Condutividade))
)
## # A tibble: 7 × 8
##   CODIGO     min    q1 median  mean    q3   max     n
##   <chr>    <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <int>
## 1 87398500  0.01  68.5   80.2  80.4  99.5 125.     34
## 2 87398900 39.7   53.4   58.3  61.1  65.5 103      36
## 3 87398950 40.9   64.7   70.1  76.1  82.5 195.     35
## 4 87398980 43.2   51.7   54.0  56.3  61.0  78.9    28
## 5 87405500 47     85.8  121.  146.  209.  286      33
## 6 87406900 62.7   95.9  142.  163.  216.  354.     35
## 7 87409900 65.7  121.   159.  179.  245.  498.     37
# plan_wide_19902020 %>% 
#    select(CODIGO, IQA) %>% 
#    group_by(CODIGO) %>% 
#    summarize(
#       min = 
#          min(IQA, 
#              na.rm = TRUE),
#       q1 = 
#          quantile(IQA, 0.25, 
#                   na.rm = TRUE),
#       median = 
#          median(IQA, 
#                 na.rm = TRUE),
#       mean = 
#          mean(IQA, 
#               na.rm= TRUE),
#       q3 = 
#          quantile(IQA, 0.75, 
#                   na.rm = TRUE),
#       max = 
#          max(IQA, 
#              na.rm = TRUE))

Time for this code chunk to run: 0.252032995223999

ggsave("cond_elet_p1.png",
       plot = cond_elet_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Removed 15 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing missing values (position_quasirandom).
ggsave("cond_elet_p2.png",
       plot = cond_elet_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 37 rows containing missing values (position_quasirandom).
ggsave("cond_elet_p3.png",
       plot = cond_elet_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Saving 10 x 6.66 in image
## Warning: Removed 25 rows containing non-finite values (stat_boxplot).
## Warning: Removed 25 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 25 rows containing missing values (position_quasirandom).
ggsave("cond_elet_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(cond_elet_p1, cond_elet_p2, cond_elet_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing missing values (position_quasirandom).
## Warning: Removed 37 rows containing non-finite values (stat_boxplot).
## Removed 37 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 37 rows containing missing values (position_quasirandom).
## Warning: Removed 25 rows containing non-finite values (stat_boxplot).
## Removed 25 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 25 rows containing missing values (position_quasirandom).

Time for this code chunk to run: 5.95157909393311

---
title: "TCC"
author: "Leonardo Fernandes Wink"
date: "`r format(Sys.time(), '%d/%m/%Y')`"
output:
  html_document: 
    highlight: haddock
    keep_md: yes
    number_sections: yes
    theme: flatly
    toc: yes
    toc_float:
      collapsed: no
      smooth_scroll: no
    fig_width: 10
    fig_height: 6.66
    fig_caption: yes
    code_download: true
  pdf_document:
    toc: yes
  word_document: 
    toc: yes
    keep_md: yes
  github_document:
    html_preview: true
always_allow_html: yes
editor_options: 
  chunk_output_type: console
fig.align: center
---

```{r Rotina pra toda vez que abrir o documento, echo = FALSE}
# Abrir o GitHub Desktop
# Verificar se há pull pra ser feito
# Abrir o RStudio
```

# Brief explanation

Every boxplot means a monitoring point (Ponto de monitoramento (or PM) in portuguese). My goal here is to analyze the evolution between decades of each water quality parameter that compounds the Water Quality Index (WQI).

The river flows in the east-west direction as shown in the image below.

![](images/paste-7AD7027F.png)

The logic behind the sorting in the boxplots is because of 2 main reasons:

1.  The original monitoring point isn't easy to understand (8 digits, like 87409900)
2.  Changing the original nomenclature to PM1, PM2 (...) makes it easier to understand that the last point has water contributions of every other point upstream.

Some features that I want to add:
*  If the parameter is x, then use x's classes (with its own classes background color plotted)
*  Define the timescale, should act just like a filter

```{r p1 example}
# plan_wide_19902020 %>%
#   filter(ANO_COLETA > "1990" &
#          ANO_COLETA <= "2000")
```

# Anotações de coisas por fazer:

-   Descobrir como colocar as estações no sentido correto montante -\> jusante nos sumários

> 87398500, 87398980, 87398900, 87398950, 87405500, 87406900, 87409900

-   ~~Aprender a segmentar o meu dataset por períodos~~
-   aprender a criar uma nova coluna com a segmentação dos períodos
-   maybe use `~facet.grid`
-   aprender a colocar a legenda dentro do gráfico
    -   reduzir o tamanho da legenda
-   ~~corrigir os valores 0 de IQA pra NA~~
-   descobrir como conseguir a equação do lm
-   ~~aprender a pivotar o sumário~~ -\> meu sumário do google docs ta batendo direitinho com o do R
-   descobrir se há outros TCCs com disponibilização de códigos
-   `Namon` tá com com casa decimal `","` e `ptot` tá com `"."`
-   correlação forte entre condutividade e Namon/Ptot/DBO

| 1990-2000 | 2000-2010 | 2010-2020 |
|:---------:|:---------:|:---------:|
| 1990-2000 | 2000-2010 | 2010-2020 |

# Instalar os pacotes

```{r instalar pacotes}
# install.packages(tidyverse)
```

## acessar os pacotes

```{r Acessar os pacotes, message = FALSE, warning = TRUE}
# library(readr)
# library(rmarkdown)
# # library(qboxplot)
# library(readxl)
# library(pillar)
# library(dplyr)
# library(tidyverse)
# library(gapminder)
# library(knitr)
# library(kableExtra)
# library(ggpubr)
# library(gridExtra)
# library(modelsummary)
# library(gtsummary)
# library(GGally)
pacman::p_load(readr, rmarkdown, readxl,
               pillar, dplyr, tidyverse,
               gapminder, knitr, kableExtra,
               gridExtra, #modelsummary, 
               gtsummary, ggplot2,
               ggbeeswarm, GGally)
# pacman::p_load(tibbletime)
```

```{r cronometrando quanto tempo cada chunk leva}
knitr::knit_hooks$set(time_it = local({
   now <- NULL
   function(before, options) {
      if (before) {
         # record the current time before each chunk
         now <<- Sys.time()
      } else {
         # calculate the time difference after a chunk
         res <- difftime(Sys.time(), now)
         # return a character string to show the time
         paste("Time for this code chunk to run:", res)
      }
   }
}))

knitr::opts_chunk$set(time_it = TRUE)
```

## importando a planilha

```{r Importando a planilha, echo = FALSE, message = TRUE, warning = FALSE}
plan_wide_19902020 <- read_delim("https://raw.githubusercontent.com/leonardofwink/TCC_gh/main/plan_wide_19902020.tsv",
                                 delim = "\t", 
                                 escape_double = FALSE,
                                 col_types = cols(
                                    Alcalinidade = col_double(),
                                    CODIGO = col_character(), 
                                    COORD_GEO_LAT_GRAU = col_double(),
                                    COORD_GEO_LONG_GRAU = col_double(),
                                    DATA_COLETA = col_date(format = "%d/%m/%Y"),
                                    Nitrato = col_double(), 
                                    Nitrito = col_double(),
                                    SDT = col_double(), 
                                    SST = col_double(),
                                    `Vazao` = col_double(), 
                                    `Vazao rio` = col_double()
                                 ),
                                 locale = locale(
                                    date_names = "pt", 
                                    decimal_mark = ",",
                                    grouping_mark = ""
                                 ),
                                 trim_ws = TRUE
)

# teste[~'2000']
# 
# teste <- plan_wide_19902020 %>%
#   dplyr::filter(DATA_COLETA >= as.POSIXct("2010-01-01")) #this works
# 
# teste$DATA_COLETA <- as.POSIXct(teste$DATA_COLETA)
# 
# teste %>% 
#   dplyr::arrange(DATA_COLETA)
# teste %>% 
#   filter_time(time_formula = '2013-01-01' ~ '2020-12-31')
# 
# 
# typeof(teste$DATA_COLETA)
# 
#   as_tbl_time(plan_wide_19902020, index = DATA_COLETA)
# str(plan_wide_19902020$DATA_COLETA)
```

```{r Visualização da planilha importada, echo = FALSE}
paged_table(plan_wide_19902020,
            options = list(rows.print = 15,
                           cols.print = 10))
```

# data wrangling

```{r data wrangling}
# Como há dados faltantes, no cálculo entre o produto das colunas, ele acaba interpretando como se fosse zero, mas na verdade é NA
plan_wide_19902020 <- plan_wide_19902020 %>% 
   mutate(IQA = ifelse(IQA == 0, NA, IQA))
```

```{r Códigos Git, echo = FALSE}
# cd myrepo
# ls
# head README.md
# git status
# git add README.md
# git commit -m "A commit from my local computer"
# 
# cd .. # voltar pro diretório acima
# rm -rf myrepo/ #remover/apagar a pasta myrepo
```

```{r Aprendendo Git, echo = FALSE}
# slides da bia que ajudam mt
# https://beatrizmilz.github.io/slidesR/git_rstudio/11-2021-ENCE.html#20
# aprendendo a sincronizar usando esse guia -> 
# https://happygitwithr-com.translate.goog/push-pull-github.html?_x_tr_sl=auto&_x_tr_tl=pt&_x_tr_hl=pt-BR
# library(usethis)
# usethis::create_github_token() criar um código pra acesso e sincronização between R e github

# gitcreds::gitcreds_set() 
# 
# use_git_config(user.name = "leonardofwink",
#                user.email = "leonardofwink@gmail.com")
# usethis::gh_token_help()

# Como mostrar os dados de um arquivo via Git/GitHub
# git clone https://github.com/leonardofwink/myrepo.git
# cd myrepo #acessa a pasta myrepo
# ls #lista os arquivos da pasta 
# head README.md #mostra as primeiras observações do arquivo

# Como mostrar os dados de um arquivo via R
# head(C:/Users/Léo/myrepo/README.md)

# Adicionar uma linha ao README.md e verificar se o Git percebe a mudança
# echo "A line I wrote on my local computer" >> README.md
# git status
## C:\Users\Léo\myrepo>git status
## On branch main
## Your branch is up to date with 'origin/main'.
## 
## Changes not staged for commit:
##   (use "git add <file>..." to update what will be committed)
##   (use "git restore <file>..." to discard changes in working directory)
##         **modified:   README.md**
## 
## no changes added to commit (use "git add" and/or "git commit -a")
```

# setting theme

```{r setting theme}
theme_grafs <- function(bg = "white", 
                        coloracao_letra = "black") {
   theme(
      plot.title = element_text(
         hjust = 0.5,
         color = coloracao_letra,
         size = 19),
      
      axis.title.x = 
         # element_text(
         # color = coloracao_letra,
         # size = 15,
         # angle = 0,),
         element_blank(),
      axis.title.y = element_text(
         color = coloracao_letra,
         size = 15,
         angle = 90),
      
      axis.text.x = element_text(
         color = coloracao_letra,
         size = 17),
      axis.text.y = element_text(
         color = coloracao_letra,
         size = 17,
         angle = 0),
      
      panel.background = element_rect(fill = bg),
      plot.background = element_rect(fill = bg),
      plot.margin = margin(l = 5, r = 10,
                           b = 5, t = 5)
   )
}
```

# setting different timescales

```{r setting periodos, echo = FALSE}
# p1 <- plan_wide_19902020 %>% 
#   filter(ANO_COLETA > "1990" &
#            ANO_COLETA <= "2000")
# 
# p2 <- plan_wide_19902020 %>% 
#   filter(ANO_COLETA > "2000" &
#            ANO_COLETA <= "2010")
# 
# p3 <- plan_wide_19902020 %>% 
#   filter(ANO_COLETA > "2010" &
#            ANO_COLETA <= "2020")

# teste_all_periodos <- plan_wide_19902020 %>% 
#   filter(
#     between(ANO_COLETA, 1990, 2000)
#   )
```

# setting sumaries

```{r Sumários, echo = FALSE}
# plan_wide_19902020 %>%
#   as_tibble() %>% 
#   filter(ANO_COLETA > "1990" &
#            ANO_COLETA <= "2000") %>% 
#   select(colnames(parametros_IQA)) %>% 
#   group_by(CODIGO) %>% 
#   group_by(colnames(parametros_IQA)) %>% 
#   summarise_each(
#     funs( 
#       min = 
#         min(., 
#             na.rm = TRUE),
#       q1 = 
#         quantile(., 0.25, 
#                  na.rm = TRUE),
#       median = 
#         median(., 
#                na.rm = TRUE),
#       mean = 
#         mean(., 
#              na.rm= TRUE),
#       q3 = 
#         quantile(., 0.75, 
#                  na.rm = TRUE),
#       max = 
#         max(., 
#             na.rm = TRUE),
#       n = 
#         length(.)
#     )
#   ) %>% 
#   pivot_longer(
#        !CODIGO,
#        names_to = "parametro",
#        values_to = "valor"
#     ) %>% 
#     pivot_wider(names_from = CODIGO,
#                 values_from = valor) %>% 
#   group_by(parametro)



# p2 <- plan_wide_19902020 %>%
#   filter(ANO_COLETA > "2000" &
#          ANO_COLETA <= "2010")
# 
# p3 <- plan_wide_19902020 %>%
#   filter(ANO_COLETA > "2010" &
#          ANO_COLETA <= "2020")

# periodo = c(p1 <- plan_wide_19902020 %>% 
#   filter(ANO_COLETA > "1990" &
#            ANO_COLETA <= "2000"),
# 
# p2 <- plan_wide_19902020 %>%
#   filter(ANO_COLETA > "2000" &
#            ANO_COLETA <= "2010"),
# 
# p3 <- plan_wide_19902020 %>%
#   filter(ANO_COLETA > "2010" &
#            ANO_COLETA <= "2020"))

# sumario <- function(parametros = parametros, periodo){
#   plan_wide_19902020 %>%
#    select(CODIGO, ., ANO_COLETA) %>% 
#    # filter(ANO_COLETA>"1990" &
#    #          ANO_COLETA<="2000") %>% 
#    group_by(CODIGO) %>% 
#    summarize(
#      min = 
#        min(parametros, 
#            na.rm = TRUE),
#      q1 = 
#        quantile(parametros, 0.25, 
#                 na.rm = TRUE),
#      median = 
#        median(parametros, 
#               na.rm = TRUE),
#      mean = 
#        mean(parametros, 
#             na.rm= TRUE),
#      q3 = 
#        quantile(parametros, 0.75, 
#                 na.rm = TRUE),
#      max = 
#        max(parametros, 
#            na.rm = TRUE))
# }

# plan_wide_19902020 %>% 
#   sumario(parametros = DBO)

# sum_IQA_p1 <- plan_wide_19902020 %>%
#    select(CODIGO, IQA, ANO_COLETA) %>% 
#    filter(ANO_COLETA>"1990" &
#             ANO_COLETA<="2000") %>% 
#    group_by(CODIGO) %>% 
#    summarize(
#      min = 
#        min(IQA, 
#            na.rm = TRUE),
#      q1 = 
#        quantile(IQA, 0.25, 
#                 na.rm = TRUE),
#      median = 
#        median(IQA, 
#               na.rm = TRUE),
#      mean = 
#        mean(IQA, 
#             na.rm= TRUE),
#      q3 = 
#        quantile(IQA, 0.75, 
#                 na.rm = TRUE),
#      max = 
#        max(IQA, 
#            na.rm = TRUE))
```

# Parâmetros físico-químicos

### Oxigênio Dissolvido

```{r setting base od}
# par_od <- plan_wide_19902020 %>%
#   select(CODIGO, `Oxigênio dissolvido`) %>%
#   group_nest(CODIGO)

# data %>%  o que o Pat fez no CC156 21min56s
#   highlight_key(., ~) %>% 
#   ggplot()
# oxig_p1 <- p1 %>% 
#   select(CODIGO, `Oxigênio dissolvido`)
# 
# par_od <- plan_wide_19902020 %>% 
#   select(CODIGO, ) %>% 
#   group_by(CODIGO)

# parametros_IQA

# parametros <- colnames(parametros_IQA)

# base_od <- function(titulo = "Título") {
#   annotate("rect",
#            xmin = -Inf, xmax = Inf,
#            ymin = -Inf, ymax = 2,
#            alpha = 1,
#            fill = "#ac5079")+ # >pior classe
#     annotate("rect",
#              xmin = -Inf, xmax = Inf,
#              ymin = 2, ymax = 4,
#              alpha = 1,
#              fill = "#eb5661")+ #classe 4
#     annotate("rect",
#              xmin = -Inf, xmax = Inf,
#              ymin = 4, ymax = 5,
#              alpha=1,
#              fill="#fcf7ab")+ #classe 3
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=5,
#              ymax=6,
#              alpha=1,
#              fill="#70c18c")+ #classe 2
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=6,
#              ymax=Inf,
#              alpha=1,
#              fill="#8dcdeb")+ #classe 1
#     stat_boxplot(
#       geom = 'errorbar',
#       width=0.3,
#       position = position_dodge(width = 0.65)
#     )+
#     labs(
#       title = titulo,
#       x = "Estação",
#       y = "mg/L"
#     )+
#     geom_quasirandom(
#       size = 1.2,
#       alpha = .25,
#       width = .07,
#     )+
#     scale_y_continuous(
#       expand = expansion(mult = c(0,0)),
#       n.breaks = 11,
#       limits = c(-1,21)
#     )+
#     scale_x_discrete(limits = c("87398500",
#                                 "87398980",
#                                 "87398900",
#                                 "87398950",
#                                 "87405500",
#                                 "87406900",
#                                 "87409900"),
#                      labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
#     )+
#     geom_smooth(method = "lm",
#                 se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
#                 aes(group=1),
#                 alpha=.5,
#                 na.rm = TRUE,
#                 size = 1)
# }

# plan_wide_19902020 %>%
#   ggplot(
#     aes(CODIGO, `Oxigênio dissolvido`)
#   )+
#   geom_boxplot(
#       fill = '#F8F8FF',
#       color = "black",
#       outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
#       width= 0.7
#     )+
#   base_od("Oxigênio 1990")


```

```{r Gráfico OD periodo 1, echo = FALSE, warning=FALSE, message = FALSE, fig.cap="Oxigênio Dissolvido no período 1990-2000"}
(od_p1 <- ggplot(plan_wide_19902020 %>% 
                    filter(ANO_COLETA > "1990" &
                              ANO_COLETA <= "2000"),
                 aes(CODIGO,
                     `Oxigênio dissolvido`)
)+
   annotate("rect",
            xmin = -Inf, xmax = Inf,
            ymin = -Inf, ymax = 2,
            alpha = 1,
            fill = "#ac5079")+ #>pior classe
   annotate("rect",
            xmin = -Inf, xmax = Inf,
            ymin = 2, ymax = 4,
            alpha = 1,
            fill = "#eb5661")+ #classe 4
   annotate("rect",
            xmin = -Inf, xmax = Inf,
            ymin = 4, ymax = 5,
            alpha = 1,
            fill = "#fcf7ab")+ #classe 3
   annotate("rect",
            xmin = -Inf, xmax = Inf,
            ymin = 5, ymax = 6,
            alpha = 1,
            fill = "#70c18c")+ #classe 2
   annotate("rect",
            xmin = -Inf, xmax = Inf,
            ymin= 6, ymax = Inf,
            alpha = 1,
            fill = "#8dcdeb")+ #classe 1
   stat_boxplot(
      geom = 'errorbar',
      width = 0.3,
      position = position_dodge(width = 0.65)
   )+
   geom_boxplot(
      fill = '#F8F8FF',
      color = "black",
      outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
      width = 0.7
   )+
   labs(
      title = "Oxigênio Dissolvido no período 1990-2000",
      x="Estação",
      y="mg/L"
   )+
   ggbeeswarm::geom_quasirandom(
      size = 1.2,
      alpha = .25,
      width = .07,
   )+
   scale_y_continuous(
      expand = expansion(mult = c(0,0)),
      n.breaks = 11,
      limits = c(-1,21)
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(
      method = "lm",
      se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
      aes(group = 1),
      alpha = .5,
      na.rm = TRUE,
      size = 1
   )+
   theme_grafs()
)
```

```{r Gráfico OD periodo 2, echo = FALSE, warning=FALSE, message = FALSE}
(od_p2 <-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2000" &
                             ANO_COLETA<="2010"),
                aes(CODIGO,
                    `Oxigênio dissolvido`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=2,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=2,
             ymax=4,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=4,
             ymax=5,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=5,
             ymax=6,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=6,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Oxigênio Dissolvido no período 2000-2010",
         x="Estação",
         y=NULL)+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(
       expand = expansion(mult = c(0,0)),
       n.breaks = 11,
       limits = c(-1,21))+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(
       method = "lm",
       se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
       aes(group=1),
       alpha=.5,
       na.rm = TRUE,
       size = 1
    )+
    theme_grafs()
)
```

```{r Gráfico OD periodo 3, echo = FALSE, warning=FALSE, message = FALSE}
(od_p3 <-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2010" &
                             ANO_COLETA<="2020"),
                aes(CODIGO,
                    `Oxigênio dissolvido`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=2,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=2,
             ymax=4,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=4,
             ymax=5,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=5,
             ymax=6,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=6,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Oxigênio Dissolvido no período 2010-2020",
         x=NULL,
         y=NULL)+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    scale_y_continuous(
       expand = expansion(mult = c(0,0)),
       n.breaks = 11,
       limits = c(-1,21))+
    geom_smooth(
       method = "lm",
       se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
       aes(group=1),
       alpha=.5,
       na.rm = TRUE,
       size = 1
    )+
    theme_grafs()
)
```

```{r Gráfico OD 3 periodos juntos, echo = TRUE, warning=FALSE, message = FALSE, fig.cap="Oxigênio Dissolvido no período 1990-2020"}
grid.arrange(od_p1, od_p2, od_p3, ncol = 3)
```

```{r Salvando OD}
ggsave("od_p1.png",
       plot = od_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("od_p2.png",
       plot = od_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("od_p3.png",
       plot = od_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("od_3periodos_2.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(od_p1, od_p2, od_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

```{r Gráfico OD_chernobyl, echo = FALSE, warning=FALSE, message = FALSE}
# p1 <- function(plan_wide_19902020, ANO_COLETA) {
#   plan_wide_19902020 %>% 
#     filter(ANO_COLETA > "1990" &
#            ANO_COLETA <= "2000")
# }
# 
# 
# classes_od <- function(plan_wide_19902020, parametro, periodo){
#   ggplot(plan_wide_19902020 %>%
#            periodo),
#   aes(CODIGO,
#       parametro)
# }


# (od_chernobyl <- ggplot(plan_wide_19902020 %>%
#                           p1(ANO_COLETA > "1990" &
#                                ANO_COLETA <= "2000"),
#                         aes(CODIGO,
#                             `Oxigênio dissolvido`))+
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=-Inf,
#              ymax=2,
#              alpha=1,
#              fill="#ac5079")+ #>pior classe
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=2,
#              ymax=4,
#              alpha=1,
#              fill="#eb5661")+ #classe 4
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=4,
#              ymax=5,
#              alpha=1,
#              fill="#fcf7ab")+ #classe 3
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=5,
#              ymax=6,
#              alpha=1,
#              fill="#70c18c")+ #classe 2
#     annotate("rect",
#              xmin=-Inf,
#              xmax=Inf,
#              ymin=6,
#              ymax=Inf,
#              alpha=1,
#              fill="#8dcdeb")+ #classe 1
#     stat_boxplot(geom = 'errorbar',
#                  width=0.3,
#                  position = position_dodge(width = 0.65))+
#     geom_boxplot(fill='#F8F8FF',
#                  color="black",
#                  outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
#                  width= 0.7)+
#     labs(title = "Oxigênio Dissolvido no período 1990-2000",
#          x="Estação",
#          y="mg/L")+
#     # geom_jitter(width = .07,
#     #             alpha=.15,
#     #             size=1.,
#     #             color="black")+
#     ggbeeswarm::geom_quasirandom(
#       size = 1.2,
#       alpha = .25,
#       width = .07,
#     )+
#     scale_y_continuous(expand = expansion(mult = c(0,0)),
#                        n.breaks = 11,
#                        limits = c(-1,21))+
#     scale_x_discrete(limits = c("87398500",
#                                 "87398980",
#                                 "87398900",
#                                 "87398950",
#                                 "87405500",
#                                 "87406900",
#                                 "87409900"),
#                      labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
#     )+
#     geom_smooth(method = "lm",
#                 se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
#                 aes(group=1),
#                 alpha=.5,
#                 na.rm = TRUE,
#                 size = 1)+
#     # geom_line(
#     #   aes(color="red"),
#     #   alpha=.0)+
#     # scale_color_manual("Legenda",
#     #                    guide="legend",
#     #                    values = c("Classe 1"="#8dcdeb",
#     #                               "Classe 2"="#70c18c",
#     #                               "Classe 3"="#fcf7ab",
#     #                               "Classe 4"="#eb5661",
#     #                               "Pior Classe"="#ac5079"))+
#     # guides(color=guide_legend(override.aes = list(linetype=c(1,1,1,1,1),
#   #                                               lwd=c(2,2,2,2,2),
#   #                                               shape=c(NA,NA,NA,NA,NA),
#   #                                               alpha=1)))+
#   theme(
#     plot.title = element_text(size = 19),
#     axis.title.y = element_text(size = 15),
#     axis.text.y = element_text(size = 17),
#     axis.text.x = element_text(size = 17),
#   )
# )
```

```{r Gráfico IQA OD periodo1, echo = FALSE, message=FALSE, warning=FALSE}
(iqaod_p1 <-ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA > "1990" &
                                ANO_COLETA <= "2000"),
                   aes(CODIGO,
                       IQA_OD, na.rm = TRUE))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7,
                 na.rm = TRUE)+
    labs(title = "Variação do IQA para o parâmetro Oxigênio Dissolvido 1990-2000",
         x="Estação",
         y="")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
    geom_smooth(
       method = "lm",
       se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
       aes(group=1),
       alpha=.5,
       na.rm = TRUE,
       size = 1
    )+
    theme_grafs()
)
```

```{r Gráfico IQA OD periodo2, echo = FALSE, warning= FALSE, message = FALSE}
(iqaod_p2 <-ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA > "2000" &
                                ANO_COLETA <= "2010"),
                   aes(CODIGO,
                       IQA_OD, na.rm = TRUE))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7,
                 na.rm = TRUE)+
    labs(title = "Variação do IQA para o parâmetro Oxigênio Dissolvido 2000-2010",
         x="Estação",
         y="")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(
       method = "lm",
       se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
       aes(group=1),
       alpha=.5,
       na.rm = TRUE,
       size = 1
    )+
    theme_grafs()
)

```

```{r Gráfico IQA OD periodo3, echo = FALSE, warning=FALSE, message = FALSE}
(iqaod_p3 <-ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA > "2010" &
                                ANO_COLETA <= "2020"),
                   aes(CODIGO,
                       IQA_OD, na.rm = TRUE))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7,
                 na.rm = TRUE)+
    labs(title = "Variação do IQA para o parâmetro Oxigênio Dissolvido 2010-2020",
         x="Estação",
         y="")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(
       method = "lm",
       se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
       aes(group=1),
       alpha=.5,
       na.rm = TRUE,
       size = 1
    )+
    theme_grafs()
)
```

```{r Gráfico OD_IQA 6 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(iqaod_p1, iqaod_p2, iqaod_p3, ncol = 3)
```

```{r Sumário OD, echo = FALSE}
(sum_od_p1 <- plan_wide_19902020 %>%
    select(CODIGO, `Oxigênio dissolvido`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"1990" &
              ANO_COLETA<="2000") %>% 
    group_by(CODIGO) %>% 
    summarize(
       min = 
          min(`Oxigênio dissolvido`, na.rm = TRUE),
       q1 = 
          quantile(`Oxigênio dissolvido`, 0.25, na.rm = TRUE),
       median = 
          median(`Oxigênio dissolvido`, na.rm = TRUE),
       mean = 
          mean(`Oxigênio dissolvido`, na.rm= TRUE),
       q3 = 
          quantile(`Oxigênio dissolvido`, 0.75, na.rm = TRUE),
       max = 
          max(`Oxigênio dissolvido`, na.rm = TRUE),
       n = 
          length(`Oxigênio dissolvido`)
    ) %>% 
    pivot_longer(
       !CODIGO,
       names_to = "par",
       values_to = "valor"
    ) %>% 
    pivot_wider(names_from = CODIGO,
                values_from = valor)
)

(sum_od_p2 <- plan_wide_19902020 %>%
      select(CODIGO, `Oxigênio dissolvido`, ANO_COLETA) %>% 
      filter(ANO_COLETA>"2000" &
                ANO_COLETA<="2010") %>% 
      group_by(CODIGO) %>% 
      summarize(
         min = 
            min(`Oxigênio dissolvido`, na.rm = TRUE),
         q1 = 
            quantile(`Oxigênio dissolvido`, 0.25, na.rm = TRUE),
         median = 
            median(`Oxigênio dissolvido`, na.rm = TRUE),
         mean = 
            mean(`Oxigênio dissolvido`, na.rm= TRUE),
         q3 = 
            quantile(`Oxigênio dissolvido`, 0.75, na.rm = TRUE),
         max = 
            max(`Oxigênio dissolvido`, na.rm = TRUE)
      )
)

(sum_od_p3 <- plan_wide_19902020 %>%
      select(CODIGO, `Oxigênio dissolvido`, ANO_COLETA) %>% 
      filter(ANO_COLETA>"2010" &
                ANO_COLETA<="2020") %>% 
      group_by(CODIGO) %>% 
      summarize(
         min = 
            min(`Oxigênio dissolvido`, na.rm = TRUE),
         q1 = 
            quantile(`Oxigênio dissolvido`, 0.25, na.rm = TRUE),
         median = 
            median(`Oxigênio dissolvido`, na.rm = TRUE),
         mean = 
            mean(`Oxigênio dissolvido`, na.rm= TRUE),
         q3 = 
            quantile(`Oxigênio dissolvido`, 0.75, na.rm = TRUE),
         max = 
            max(`Oxigênio dissolvido`, na.rm = TRUE)
      )
)

# sumario_OD3 <- plan_wide_19902020 %>%
#   select(DATA_COLETA, CODIGO, `Oxigênio dissolvido`) %>% 
#   pivot_wider(id_cols = DATA_COLETA,
#               names_from = CODIGO,
#               values_from = plan_wide_19902020$`Oxigênio dissolvido`)
# 
# unique(plan_wide_19902020$CODIGO)

# 
#   pivot_wider(id_cols = CODIGO,
#               names_from = CODIGO,
#               values_from = `Oxigênio dissolvido`)
# 
# 
#   group_by(CODIGO) %>%
#   get_summary_stats(type = "common") %>%
#   pivot_wider(id_cols = variable,
#               names_from = CODIGO,
#               values_from = variable$`Oxigênio dissolvido`)
# 
# # install.packages("ggpubr")
# # library(ggpubr)
```

```{r setup, include=FALSE}
# knitr::opts_chunk$set(echo = TRUE)
```

### Demanda Bioquímica de Oxigênio

```{r Gráfico DBO período1, echo = FALSE, warning = FALSE, message = FALSE}
(dbo_p1<-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"1990" &
                             ANO_COLETA<="2000"),
                aes(CODIGO,
                    DBO))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=10,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=5,
             ymax=10,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3,
             ymax=5,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=3,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Demanda Bioquímica de Oxigênio no período 1990-2000",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(1,100),
                       trans = "log10")+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico DBO período2, echo = FALSE, warning = FALSE, message = FALSE}
(dbo_p2<-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2000" &
                             ANO_COLETA<="2010"),
                aes(CODIGO,
                    DBO))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=10,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=5,
             ymax=10,
             alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=3,
            ymax=5,
            alpha=1,
            fill="#70c18c")+ #classe 2
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=3,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Demanda Bioquímica de Oxigênio no período 2000-2010",
        x="Estação",
        y="mg/L")+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(1,100),
                       trans = "log10")+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico DBO período3, echo = FALSE, warning = FALSE, message = FALSE}
(dbo_p3<-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2010" &
                             ANO_COLETA<="2020"),
                aes(CODIGO,
                    DBO, na.rm=TRUE))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=10,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=5,
             ymax=10,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3,
             ymax=5,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=3,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Demanda Bioquímica de Oxigênio no período 2010-2020",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(1,100),
                       trans = "log10")+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
        geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico IQA DBO periodo1, echo = FALSE, warning = FALSE, message = FALSE}
(iqa_dbo1<-ggplot(plan_wide_19902020 %>% 
                    filter(ANO_COLETA>"1990" &
                             ANO_COLETA<="2000"),
                  aes(CODIGO,
                      IQA_DBO))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=19,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=19,
            ymax=36,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=36,
            ymax=51,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=51,
            ymax=79,
            alpha=1,
            fill="#70c18c")+ #classe 2
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=79,
            ymax=Inf,
            alpha=1,
            fill="#8dcdeb")+ #classe 1))
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65),
                na.rm = TRUE)+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Variação do IQA para o parâmetro DBO 1990-2020",
        x="Estação",
        y="mg/L")+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_y_continuous(expand = expansion(mult = c(0,0)),
                      n.breaks = 6,
                      limits = c(-1,101))+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico IQA DBO periodo2, echo = FALSE, warning = FALSE, message = FALSE}
(iqa_dbo2<-ggplot(plan_wide_19902020%>% 
                     filter(ANO_COLETA>"2000" &
                               ANO_COLETA<="2010"),
                  aes(CODIGO,
                      IQA_DBO))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1))
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Variação do IQA para o parâmetro DBO 2000-2010",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico IQA DBO periodo3, echo = FALSE, warning = FALSE, message = FALSE}
(iqa_dbo3<-ggplot(plan_wide_19902020%>% 
                     filter(ANO_COLETA>"2010" &
                               ANO_COLETA<="2020"),
                  aes(CODIGO,
                      IQA_DBO))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1))
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Variação do IQA para o parâmetro DBO 2010-2020",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
        geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico DBO 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(dbo_p1, dbo_p2, dbo_p3, ncol = 3)
```

```{r Sumário DBO}
(sum_dbo_p1 <- plan_wide_19902020 %>%
   select(CODIGO, DBO, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(DBO, 
           na.rm = TRUE),
     q1 = 
       quantile(DBO, 0.25, 
                na.rm = TRUE),
     median = 
       median(DBO, 
              na.rm = TRUE),
     mean = 
       mean(DBO, 
            na.rm= TRUE),
     q3 = 
       quantile(DBO, 0.75, 
                na.rm = TRUE),
     max = 
       max(DBO, 
           na.rm = TRUE))
)

(sum_dbo_p2 <- plan_wide_19902020 %>%
    select(CODIGO, DBO, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(DBO, 
            na.rm = TRUE),
      q1 = 
        quantile(DBO, 0.25, 
                 na.rm = TRUE),
      median = 
        median(DBO, 
               na.rm = TRUE),
      mean = 
        mean(DBO, 
             na.rm= TRUE),
      q3 = 
        quantile(DBO, 0.75, 
                 na.rm = TRUE),
      max = 
        max(DBO, 
            na.rm = TRUE))
)

(sum_dbo_p3 <- plan_wide_19902020 %>%
    select(CODIGO, DBO, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(DBO, 
            na.rm = TRUE),
      q1 = 
        quantile(DBO, 0.25, 
                 na.rm = TRUE),
      median = 
        median(DBO, 
               na.rm = TRUE),
      mean = 
        mean(DBO, 
             na.rm= TRUE),
      q3 = 
        quantile(DBO, 0.75, 
                 na.rm = TRUE),
      max = 
        max(DBO, 
            na.rm = TRUE))
)
```

```{r Salvando DBO}
ggsave("dbo_p1.png",
       plot = dbo_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")


ggsave("dbo_p2.png",
       plot = dbo_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("dbo_p3.png",
       plot = dbo_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("dbo_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(dbo_p1, dbo_p2, dbo_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### Fósforo total

```{r Gráfico Fósforo total periodo1, warning = FALSE, message = FALSE}
(ptot_p1<-ggplot(plan_wide_19902020%>% 
                   filter(ANO_COLETA>"1990" &
                             ANO_COLETA<="2000"),
                 aes(CODIGO,
                     `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 1990-2000",
         x="Estação",
         y="mg/L")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                       trans = "log10")+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

```

```{r Gráfico Fósforo total periodo2, warning = FALSE, message = FALSE}
(ptot_p2 <- ggplot(plan_wide_19902020%>% 
                      filter(ANO_COLETA>"2000" &
                                ANO_COLETA<="2010"),
                   aes(CODIGO,
                       `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 2000-2010",
         x="Estação",
         y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                      trans = "log10")+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

```

```{r Gráfico Fósforo total periodo3, warning = FALSE, message = FALSE}
(ptot_p3 <- ggplot(plan_wide_19902020%>% 
                      filter(ANO_COLETA>"2010" &
                                ANO_COLETA<="2020"),
                   aes(CODIGO,
                       `Fósforo total`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.15,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0.1,
             ymax=0.15,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=0.1,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Fósforo total no período 2010-2020",
         x="Estação",
         y="mg/L")+
    scale_y_continuous(expand = expansion(mult = c(0.03,0.03)),
                       n.breaks = 8,
                       limits = c(min(plan_wide_19902020$`Fósforo total`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Fósforo total`), na.rm = TRUE),
                       trans = "log10")+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)

```

```{r Gráfico Ptot 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(ptot_p1, ptot_p2, ptot_p3, ncol = 3)
```

```{r Sumário Fósforo total}
(sum_ptot_p1 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Fósforo total`, na.rm = TRUE),
     q1 = 
       quantile(`Fósforo total`, 0.25, na.rm = TRUE),
     median = 
       median(`Fósforo total`, na.rm = TRUE),
     mean = 
       mean(`Fósforo total`, na.rm= TRUE),
     q3 = 
       quantile(`Fósforo total`, 0.75, na.rm = TRUE),
     max = 
       max(`Fósforo total`, na.rm = TRUE)))

(sum_ptot_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Fósforo total`, na.rm = TRUE),
      q1 = 
        quantile(`Fósforo total`, 0.25, na.rm = TRUE),
      median = 
        median(`Fósforo total`, na.rm = TRUE),
      mean = 
        mean(`Fósforo total`, na.rm= TRUE),
      q3 = 
        quantile(`Fósforo total`, 0.75, na.rm = TRUE),
      max = 
        max(`Fósforo total`, na.rm = TRUE)))

(sum_ptot_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Fósforo total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Fósforo total`, na.rm = TRUE),
      q1 = 
        quantile(`Fósforo total`, 0.25, na.rm = TRUE),
      median = 
        median(`Fósforo total`, na.rm = TRUE),
      mean = 
        mean(`Fósforo total`, na.rm= TRUE),
      q3 = 
        quantile(`Fósforo total`, 0.75, na.rm = TRUE),
      max = 
        max(`Fósforo total`, na.rm = TRUE)))

```

```{r Salvando Ptot}
ggsave("ptot_p1.png",
       plot = ptot_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ptot_p2.png",
       plot = ptot_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ptot_p3.png",
       plot = ptot_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ptot_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(ptot_p1, ptot_p2, ptot_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### Escherichia coli

```{r Gráfico Ecoli periodo1, warning = FALSE, message = FALSE}
(ecoli_p1 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"1990" &
                                 ANO_COLETA<="2000"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 1990-2000",
         x="Estação",
         y="NMP/100mL")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico Ecoli periodo2, warning = FALSE, message = FALSE}
(ecoli_p2 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2000" &
                                 ANO_COLETA<="2010"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 2000-2010",
         x="Estação",
         y="NMP/100mL")+
    scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                       n.breaks = 9,
                       limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                       trans = "log10",
                       labels = scales::number_format(accuracy = 1,
                                                      decimal.mark = ",",
                                                      big.mark = " "))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico Ecoli periodo3, warning = FALSE, message = FALSE}
(ecoli_p3 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2010" &
                                 ANO_COLETA<="2020"),
                    aes(CODIGO,
                        `Escherichia coli`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3200,
             ymax=Inf,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=800,
             ymax=3200,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=160,
             ymax=800,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=160,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Escherichia coli no período 2010-2020",
         x="Estação",
         y="NMP/100mL")+
    scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                       n.breaks = 9,
                       limits = c(min(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE),
                                  max(plan_wide_19902020$`Escherichia coli`, na.rm = TRUE)),
                       trans = "log10",
                       labels = scales::number_format(accuracy = 1,
                                                      decimal.mark = ",",
                                                      big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico ecoli 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(ecoli_p1, ecoli_p2, ecoli_p3, ncol = 3)
```

```{r Sumário Ecoli}
(sum_ecoli_p1 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"1990" &
              ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Escherichia coli`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Escherichia coli`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Escherichia coli`, 
              na.rm = TRUE),
     mean = 
       mean(`Escherichia coli`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Escherichia coli`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Escherichia coli`, 
           na.rm = TRUE))
)

(sum_ecoli_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Escherichia coli`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Escherichia coli`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Escherichia coli`, 
               na.rm = TRUE),
      mean = 
        mean(`Escherichia coli`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Escherichia coli`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Escherichia coli`, 
            na.rm = TRUE))
)

(sum_ecoli_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Escherichia coli`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Escherichia coli`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Escherichia coli`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Escherichia coli`, 
               na.rm = TRUE),
      mean = 
        mean(`Escherichia coli`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Escherichia coli`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Escherichia coli`, 
            na.rm = TRUE))
)
```

```{r Salvando ecoli}
ggsave("ecoli_p1.png",
       plot = ecoli_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ecoli_p2.png",
       plot = ecoli_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ecoli_p3.png",
       plot = ecoli_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("ecoli_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(ecoli_p1, ecoli_p2, ecoli_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### Nitrogênio amoniacal

```{r Gráfico Nitrogênio total periodo1, warning = FALSE, message = FALSE}
(namon_p1 <- ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA>"1990" &
                               ANO_COLETA<="2000"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=3.7,
             ymax=13.3,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=0,
             ymax=3.7,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 1990-2000",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico Nitrogênio total periodo2, warning = FALSE, message = FALSE}
(namon_p2 <- ggplot(plan_wide_19902020 %>% 
                      filter(ANO_COLETA>"2000" &
                               ANO_COLETA<="2010"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=3.7,
            ymax=13.3,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=3.7,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 2000-2010",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico Nitrogênio total periodo3, warning = FALSE, message = FALSE}
(namon_p3 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2010" &
                                 ANO_COLETA<="2020"),
                    aes(CODIGO,
                        `Nitrogênio total`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=13.3,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=3.7,
            ymax=13.3,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=3.7,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Nitrogênio amoniacal no período 2010-2020",
        x="Estação",
        y="mg/L")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 9,
                      limits = c(min(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE),
                                 max(plan_wide_19902020$`Nitrogênio total`, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = .001,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico Namon 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(namon_p1, namon_p2, namon_p3, ncol = 3)
```

```{r Sumário Nitrogênio total}
(sum_namon_p1 <- plan_wide_19902020 %>%
   select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Nitrogênio total`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Nitrogênio total`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Nitrogênio total`, 
              na.rm = TRUE),
     mean = 
       mean(`Nitrogênio total`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Nitrogênio total`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Nitrogênio total`, 
           na.rm = TRUE))
)

(sum_namon_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Nitrogênio total`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Nitrogênio total`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Nitrogênio total`, 
               na.rm = TRUE),
      mean = 
        mean(`Nitrogênio total`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Nitrogênio total`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Nitrogênio total`, 
            na.rm = TRUE))
)

(sum_namon_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Nitrogênio total`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Nitrogênio total`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Nitrogênio total`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Nitrogênio total`, 
               na.rm = TRUE),
      mean = 
        mean(`Nitrogênio total`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Nitrogênio total`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Nitrogênio total`, 
            na.rm = TRUE))
)
```

```{r Salvando namon}
ggsave("namon_p1.png",
       plot = namon_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("namon_p2.png",
       plot = namon_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("namon_p3.png",
       plot = namon_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("namon_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(namon_p1, namon_p2, namon_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### Turbidez

```{r Gráfico Turbidez periodo1, warning = FALSE, message = FALSE}
(turb_p1 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"1990" &
                              ANO_COLETA<="2000"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 1990-2000",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico Turbidez periodo2, warning = FALSE, message = FALSE}
(turb_p2 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"2000" &
                              ANO_COLETA<="2010"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 2000-2010",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico Turbidez periodo3, warning = FALSE, message = FALSE}
(turb_p3 <- ggplot(plan_wide_19902020 %>% 
                     filter(ANO_COLETA>"2010" &
                              ANO_COLETA<="2020"),
                   aes(CODIGO,
                       Turbidez))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=100,
            ymax=Inf,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=40,
            ymax=100,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=0,
            ymax=40,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Turbidez no período 2010-2020",
        x="Estação",
        y="UNT")+
   scale_y_continuous(expand = expansion(mult = c(0.05, 0.03)),
                      n.breaks = 8,
                      limits = c(min(plan_wide_19902020$Turbidez, na.rm = TRUE),
                                 max(plan_wide_19902020$Turbidez, na.rm = TRUE)),
                      trans = "log10",
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico turb 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(turb_p1, turb_p2, turb_p3, ncol = 3)
```

```{r Sumário Turbidez}
(sum_turb_p1 <- plan_wide_19902020 %>%
   select(CODIGO, Turbidez, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(Turbidez, 
           na.rm = TRUE),
     q1 = 
       quantile(Turbidez, 0.25, 
                na.rm = TRUE),
     median = 
       median(Turbidez, 
              na.rm = TRUE),
     mean = 
       mean(Turbidez, 
            na.rm= TRUE),
     q3 = 
       quantile(Turbidez, 0.75, 
                na.rm = TRUE),
     max = 
       max(Turbidez, 
           na.rm = TRUE))
)

(sum_turb_p2 <- plan_wide_19902020 %>%
    select(CODIGO, Turbidez, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Turbidez, 
            na.rm = TRUE),
      q1 = 
        quantile(Turbidez, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Turbidez, 
               na.rm = TRUE),
      mean = 
        mean(Turbidez, 
             na.rm= TRUE),
      q3 = 
        quantile(Turbidez, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Turbidez, 
            na.rm = TRUE))
)

(sum_turb_p3 <- plan_wide_19902020 %>%
    select(CODIGO, Turbidez, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Turbidez, 
            na.rm = TRUE),
      q1 = 
        quantile(Turbidez, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Turbidez, 
               na.rm = TRUE),
      mean = 
        mean(Turbidez, 
             na.rm= TRUE),
      q3 = 
        quantile(Turbidez, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Turbidez, 
            na.rm = TRUE))
) 
```

```{r Salvando turb}
ggsave("turb_p1.png",
       plot = turb_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("turb_p2.png",
       plot = turb_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("turb_p3.png",
       plot = turb_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("turb_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(turb_p1, turb_p2, turb_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### pH

```{r Gráfico pH periodo1, warning = FALSE, message = FALSE}
(pH_p1 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"1990" &
                            ANO_COLETA<="2000"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 1990-2000",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico pH periodo2, warning = FALSE, message = FALSE}
(pH_p2 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2000" &
                            ANO_COLETA<="2010"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 2000-2010",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico pH periodo3, warning = FALSE, message = FALSE}
(pH_p3 <- ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA>"2010" &
                            ANO_COLETA<="2020"),
                 aes(CODIGO,
                     pH))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=6,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=9,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=6,
            ymax=9,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "pH no período 2010-2020",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.01)),
                      n.breaks = 8,
                      limits = c(4,11),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico pH 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(pH_p1, pH_p2, pH_p3, ncol = 3)
```

```{r Sumário pH}
(sum_pH_p1 <- plan_wide_19902020 %>%
   select(CODIGO, pH, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(pH, 
           na.rm = TRUE),
     q1 = 
       quantile(pH, 0.25, 
                na.rm = TRUE),
     median = 
       median(pH, 
              na.rm = TRUE),
     mean = 
       mean(pH, 
            na.rm= TRUE),
     q3 = 
       quantile(pH, 0.75, 
                na.rm = TRUE),
     max = 
       max(pH, 
           na.rm = TRUE))
)

(sum_pH_p2 <- plan_wide_19902020 %>%
    select(CODIGO, pH, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(pH, 
            na.rm = TRUE),
      q1 = 
        quantile(pH, 0.25, 
                 na.rm = TRUE),
      median = 
        median(pH, 
               na.rm = TRUE),
      mean = 
        mean(pH, 
             na.rm= TRUE),
      q3 = 
        quantile(pH, 0.75, 
                 na.rm = TRUE),
      max = 
        max(pH, 
            na.rm = TRUE))
) 

(sum_pH_p3 <- plan_wide_19902020 %>%
    select(CODIGO, pH, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(pH, 
            na.rm = TRUE),
      q1 = 
        quantile(pH, 0.25, 
                 na.rm = TRUE),
      median = 
        median(pH, 
               na.rm = TRUE),
      mean = 
        mean(pH, 
             na.rm= TRUE),
      q3 = 
        quantile(pH, 0.75, 
                 na.rm = TRUE),
      max = 
        max(pH, 
            na.rm = TRUE))
)
```

```{r Salvando pH}
ggsave("pH_p1.png",
       plot = pH_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("pH_p2.png",
       plot = pH_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("pH_p3.png",
       plot = pH_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("pH_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(pH_p1, pH_p2, pH_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### Sólidos totais

```{r Gráfico SólTot periodo1, warning = FALSE, message = FALSE}
(SolTot_p1 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"1990" &
                                ANO_COLETA<="2000"),
                     aes(CODIGO,
                         `Sólidos totais`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Sólidos totais no período 1990-2000",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico SólTot periodo2, warning = FALSE, message = FALSE}
(SolTot_p2 <- ggplot(plan_wide_19902020 %>% 
                       filter(ANO_COLETA>"2000" &
                                ANO_COLETA<="2010"),
                     aes(CODIGO,
                         `Sólidos totais`))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Sólidos totais no período 2000-2010",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
       size = 1.2,
       alpha = .25,
       width = .07,
    )+
    scale_x_discrete(limits = c("87398500", 
                                "87398980", 
                                "87398900", 
                                "87398950", 
                                "87405500", 
                                "87406900", 
                                "87409900"),
                     labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
    )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
    theme_grafs()
)
```

```{r Gráfico SólTot periodo3, warning = FALSE, message = FALSE}
(SolTot_p3 <- ggplot(plan_wide_19902020 %>% 
                        filter(ANO_COLETA>"2010" &
                                  ANO_COLETA<="2020"),
                     aes(CODIGO,
                         `Sólidos totais`))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=500,
             ymax=Inf,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=500,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65))+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7)+
    labs(title = "Sólidos totais no período 2010-2020",
         x="Estação",
         y="")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$`Sólidos totais`, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico SólTot 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(SolTot_p1, SolTot_p2, SolTot_p3, ncol = 3)
```

```{r Sumário Sólidos Totais}
(sum_SolTot_p1 <- plan_wide_19902020 %>%
   select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(`Sólidos totais`, 
           na.rm = TRUE),
     q1 = 
       quantile(`Sólidos totais`, 0.25, 
                na.rm = TRUE),
     median = 
       median(`Sólidos totais`, 
              na.rm = TRUE),
     mean = 
       mean(`Sólidos totais`, 
            na.rm= TRUE),
     q3 = 
       quantile(`Sólidos totais`, 0.75, 
                na.rm = TRUE),
     max = 
       max(`Sólidos totais`, 
           na.rm = TRUE))
)

(sum_SolTot_p2 <- plan_wide_19902020 %>%
    select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Sólidos totais`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Sólidos totais`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Sólidos totais`, 
               na.rm = TRUE),
      mean = 
        mean(`Sólidos totais`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Sólidos totais`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Sólidos totais`, 
            na.rm = TRUE))
)

(sum_SolTot_p3 <- plan_wide_19902020 %>%
    select(CODIGO, `Sólidos totais`, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(`Sólidos totais`, 
            na.rm = TRUE),
      q1 = 
        quantile(`Sólidos totais`, 0.25, 
                 na.rm = TRUE),
      median = 
        median(`Sólidos totais`, 
               na.rm = TRUE),
      mean = 
        mean(`Sólidos totais`, 
             na.rm= TRUE),
      q3 = 
        quantile(`Sólidos totais`, 0.75, 
                 na.rm = TRUE),
      max = 
        max(`Sólidos totais`, 
            na.rm = TRUE))
)
```

```{r Salvando SolTot}
ggsave("SolTot_p1.png",
       plot = SolTot_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("SolTot_p2.png",
       plot = SolTot_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("SolTot_p3.png",
       plot = SolTot_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("SolTot_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(SolTot_p1, SolTot_p2, SolTot_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

### IQA

```{r Gráfico IQA periodo1, echo = FALSE, message=FALSE, warning=FALSE}
(iqa_p1 <-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA > "1990" &
                            ANO_COLETA <= "2000"),
                 aes(CODIGO,
                     IQA, na.rm = TRUE))+
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=-Inf,
             ymax=19,
             alpha=1,
             fill="#ac5079")+ #>pior classe
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=19,
             ymax=36,
             alpha=1,
             fill="#eb5661")+ #classe 4
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=36,
             ymax=51,
             alpha=1,
             fill="#fcf7ab")+ #classe 3
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=51,
             ymax=79,
             alpha=1,
             fill="#70c18c")+ #classe 2
    annotate("rect",
             xmin=-Inf,
             xmax=Inf,
             ymin=79,
             ymax=Inf,
             alpha=1,
             fill="#8dcdeb")+ #classe 1
    stat_boxplot(geom = 'errorbar',
                 width=0.3,
                 position = position_dodge(width = 0.65),
                 na.rm = TRUE)+
    geom_boxplot(fill='#F8F8FF',
                 color="black",
                 outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                 width= 0.7,
                 na.rm = TRUE)+
    labs(title = "Variação do IQA no período 1990-2000",
         x="Estação",
         y="")+
    scale_y_continuous(expand = expansion(mult = c(0,0)),
                       n.breaks = 6,
                       limits = c(-1,101))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
    geom_smooth(method = "lm",
                se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
                aes(group=1),
                alpha=.5,
                na.rm = TRUE,
                size = 1)+
   theme_grafs()+
   theme(axis.title.y = element_blank())
)
```

```{r Gráfico IQA periodo2, echo = FALSE, message=FALSE, warning=FALSE}
(iqa_p2 <-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA > "2000" &
                            ANO_COLETA <= "2010"),
                 aes(CODIGO,
                     IQA, na.rm = TRUE))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=19,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=19,
            ymax=36,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=36,
            ymax=51,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=51,
            ymax=79,
            alpha=1,
            fill="#70c18c")+ #classe 2
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=79,
            ymax=Inf,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65),
                na.rm = TRUE)+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7,
                na.rm = TRUE)+
   labs(title = "Variação do IQA no período 2000-2010",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0,0)),
                      n.breaks = 6,
                      limits = c(-1,101))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
 theme_grafs()+
   theme(axis.title.y = element_blank()
   )
)
```

```{r Gráfico IQA periodo3, echo = FALSE, message=FALSE, warning=FALSE}
(iqa_p3 <-ggplot(plan_wide_19902020 %>% 
                   filter(ANO_COLETA > "2010" &
                            ANO_COLETA <= "2020"),
                 aes(CODIGO,
                     IQA, na.rm = TRUE))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=19,
            alpha=1,
            fill="#ac5079")+ #>pior classe
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=19,
            ymax=36,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=36,
            ymax=51,
            alpha=1,
            fill="#fcf7ab")+ #classe 3
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=51,
            ymax=79,
            alpha=1,
            fill="#70c18c")+ #classe 2
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=79,
            ymax=Inf,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65),
                na.rm = TRUE)+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7,
                na.rm = TRUE)+
   labs(title = "Variação do IQA no período 2010-2020",
        x="Estação",
        y="")+
   scale_y_continuous(expand = expansion(mult = c(0,0)),
                      n.breaks = 6,
                      limits = c(-1,101))+
   ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
    theme_grafs()+
    theme(axis.title.y = element_blank())
)
```

```{r Gráfico IQA 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(iqa_p1, iqa_p2, iqa_p3, ncol = 3)
```

```{r Sumário IQA}
(sum_IQA_p1 <- plan_wide_19902020 %>%
   select(CODIGO, IQA, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(IQA, 
           na.rm = TRUE),
     q1 = 
       quantile(IQA, 0.25, 
                na.rm = TRUE),
     median = 
       median(IQA, 
              na.rm = TRUE),
     mean = 
       mean(IQA, 
            na.rm= TRUE),
     q3 = 
       quantile(IQA, 0.75, 
                na.rm = TRUE),
     max = 
       max(IQA, 
           na.rm = TRUE))
)

(sum_IQA_p2 <- plan_wide_19902020 %>%
    select(CODIGO, IQA, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(IQA, 
            na.rm = TRUE),
      q1 = 
        quantile(IQA, 0.25, 
                 na.rm = TRUE),
      median = 
        median(IQA, 
               na.rm = TRUE),
      mean = 
        mean(IQA, 
             na.rm= TRUE),
      q3 = 
        quantile(IQA, 0.75, 
                 na.rm = TRUE),
      max = 
        max(IQA, 
            na.rm = TRUE))
)

(sum_IQA_p3 <- plan_wide_19902020 %>%
    select(CODIGO, IQA, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(IQA, 
            na.rm = TRUE),
      q1 = 
        quantile(IQA, 0.25, 
                 na.rm = TRUE),
      median = 
        median(IQA, 
               na.rm = TRUE),
      mean = 
        mean(IQA, 
             na.rm= TRUE),
      q3 = 
        quantile(IQA, 0.75, 
                 na.rm = TRUE),
      max = 
        max(IQA, 
            na.rm = TRUE),
      n = 
        length(IQA))
)

```

```{r Salvando iqa}
ggsave("iqa_p1.png",
       plot = iqa_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("iqa_p2.png",
       plot = iqa_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("iqa_p3.png",
       plot = iqa_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("iqa_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(iqa_p1, iqa_p2, iqa_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")
```

## Testando coisas

```{r Testando coisas, include = FALSE}
# plan_wide_19902020 %>% 
#    select(CODIGO, `Oxigênio dissolvido`, ANO_COLETA) %>% 
#    ggplot(aes(ANO_COLETA, `Oxigênio dissolvido`, 
#       col = CODIGO))+
#    geom_line()+
#    facet_wrap(~ CODIGO, ncol = 7)

# df111 <- data.frame(x = c(1:100))
# glimpse(df111)
# df111$y <- 2 + 3 * df111$x + rnorm(100, sd = 40)
# 
# lm_eqn <- function(df111){
#     m <- lm(y ~ x, df111);
#     eq <- substitute(y == a + b %.% x*","~~r^2~"="~r2,
#          list(a = format(unname(coef(m)[1]), digits = 2),
#               b = format(unname(coef(m)[2]), digits = 2),
#              r2 = format(summary(m)$r.squared, digits = 3)))
#     as.character(as.expression(eq));
# } 
# p2 <- p111 +
#   geom_text(x = 25, y = 300,
#             label = lm_eqn(df111),
#             parse = TRUE)
# p2
# 
# 
# lm_eqc <- function(plan_wide_19902020){
#    m <- lm(`Oxigênio dissolvido` ~ CODIGO, plan_wide_19902020);
#    eq <- substitute(y == a + b %.% x*","~~r^2~"="~r2,
#                     list(a = format(unname(coef(m)[1]), digits = 2),
#                          b = format(unname(coef(m)[2]), digits = 2),
#                          r2 = format(summary(m)$r.squared, digits = 3)))
#    as.character(as.expression(eq));
# }
# 
# (od_p1 <-ggplot(plan_wide_19902020 %>%
#                    filter(ANO_COLETA>"1990" &
#                              ANO_COLETA<="2000"),
#                 aes(CODIGO,
#                     `Oxigênio dissolvido`))+
#       annotate("rect",
#                xmin=-Inf,
#                xmax=Inf,
#                ymin=-Inf,
#                ymax=2,
#                alpha=1,
#                fill="#ac5079")+ #>pior classe
#       annotate("rect",
#                xmin=-Inf,
#                xmax=Inf,
#                ymin=2,
#                ymax=4,
#                alpha=1,
#                fill="#eb5661")+ #classe 4
#       annotate("rect",
#                xmin=-Inf,
#                xmax=Inf,
#                ymin=4,
#                ymax=5,
#                alpha=1,
#                fill="#fcf7ab")+ #classe 3
#       annotate("rect",
#                xmin=-Inf,
#                xmax=Inf,
#                ymin=5,
#                ymax=6,
#                alpha=1,
#                fill="#70c18c")+ #classe 2
#       annotate("rect",
#                xmin=-Inf,
#                xmax=Inf,
#                ymin=6,
#                ymax=Inf,
#                alpha=1,
#                fill="#8dcdeb")+ #classe 1
#       stat_boxplot(geom = 'errorbar',
#                    width=0.3,
#                    position = position_dodge(width = 0.65))+
#       geom_boxplot(fill='#F8F8FF',
#                    color="black",
#                    outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
#                    width= 0.7)+
#       labs(title = "Oxigênio Dissolvido no período 1990-2000",
#            x="Estação",
#            y="mg/L")+
#       # geom_jitter(width = .05,
#       #             alpha=.2,
#       #             size=1.5,
#       #             color="black")+
#       scale_y_continuous(expand = expansion(mult = c(0,0)),
#                          n.breaks = 11,
#                          limits = c(-1,21))+
#       scale_x_discrete(limits = c("87398500", "87398980", "87398900", "87398950", "87405500", "87406900", "87409900"))+
#       geom_smooth(method = "lm",
#                   se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
#                   aes(group=1),
#                   alpha=.5,
#                   na.rm = TRUE,
#                   size = 1)+
#       # annotate(geom_text(aes(x = "87405500", y = 15),
#       #                    label = lm_eqc(plan_wide_19902020),
#       #                    parse = TRUE,
#       #                    inherit.aes = TRUE,
#       #                    check_overlap = TRUE))+
#       #  geom_line(
#       #     aes(color="red"),
#       #     alpha=.0,
#       # )+
#       # scale_color_manual("Legenda",
#       #                    guide="legend",
#       #                    values = c("Classe 1"="#8dcdeb",
#       #                               "Classe 2"="#70c18c",
#       #                               "Classe 3"="#fcf7ab",
#       #                               "Classe 4"="#eb5661",
#       #                               "Pior Classe"="#ac5079"))+
#    # guides(color=guide_legend(override.aes = list(linetype=c(1,1,1,1,1),
#    #                                               lwd=c(2,2,2,2,2),
#    #                                               shape=c(NA,NA,NA,NA,NA),
#    #                                               alpha=1)))+
#       theme(legend.position = "bottom")+
#       theme_classic())

# list1111 <- sessionInfo()
# list1111$loadedOnly

# install.packages("ggpmisc")
# library(ggpmisc)

# summary(lm(plan_wide_19902020$CODIGO~plan_wide_19902020$DBO))
# 
# 
# p <- ggplot(data, aes(y=number, x=pod)) +
#   geom_boxplot()
# print(p)

# install.packages("GGally")


# fit = lm(plan_wide_19902020$`Oxigênio dissolvido`~ plan_wide_19902020$CODIGO)
# summary(fit)
# summary.lm(fit)
# 
# plan_wide_19902020$IQA
# 
# plan_wide_19902020 <- plan_wide_19902020 %>% 
#    mutate(IQA = ifelse(IQA == 0, NA, IQA))
# pacman::p_load(esquisse)
```

### Correlação

```{r Correlação, time_it = TRUE}
parametros_IQA <- plan_wide_19902020 %>%
  select(CODIGO,
         pH,
         DBO,
         `Nitrogênio amoniacal`,
         `Nitrogênio total`,
         `Fósforo total`,
         `Temperatura água`,
         Turbidez,
         `Sólidos totais`,
         `Oxigênio dissolvido`,
         Condutividade)

write.csv(parametros_IQA,
          "./parametros_IQA.csv",
          row.names = FALSE)

parametros_IQA %>% 
  select(-CODIGO) %>% 
  ggcorr(method = "complete.obs",
           # "pearson",
           # "pairwise",
         name = "Correlação",
         label = TRUE,
         label_alpha = TRUE,
         digits = 3,
         low = "#3B9AB2",
         mid = "#EEEEEE",
         high = "#F21A00",
         # palette = "RdYlBu",
         layout.exp = 0,
         legend.position = "left",
         label_round = 3,
         )

# Gráfico das correlações entre todos os parâmetros com significância
# correl_IQA <- parametros_IQA %>% 
#   select(-CODIGO) %>% 
#   ggpairs(title = "Correlação entre parâmetros que compõem o IQA",
#           axisLabels = "show")
```

### Condutividade elétrica

```{r Gráfico cond_elet periodo1, warning = FALSE, message = FALSE}
(cond_elet_p1 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"1990" &
                                   ANO_COLETA<="2000"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 1990-2000",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico cond_elet periodo2, warning = FALSE, message = FALSE}
(cond_elet_p2 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"2000" &
                                   ANO_COLETA<="2010"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 2000-2010",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico cond_elet periodo3, warning = FALSE, message = FALSE}
(cond_elet_p3 <- ggplot(plan_wide_19902020 %>% 
                          filter(ANO_COLETA>"2010" &
                                   ANO_COLETA<="2020"),
                        aes(CODIGO,
                            Condutividade))+
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=500,
            ymax=Inf,
            alpha=1,
            fill="#eb5661")+ #classe 4
   annotate("rect",
            xmin=-Inf,
            xmax=Inf,
            ymin=-Inf,
            ymax=500,
            alpha=1,
            fill="#8dcdeb")+ #classe 1
   stat_boxplot(geom = 'errorbar',
                width=0.3,
                position = position_dodge(width = 0.65))+
   geom_boxplot(fill='#F8F8FF',
                color="black",
                outlier.shape = NA, #se deixar NA fica só o jitter, se não, deixa 1
                width= 0.7)+
   labs(title = "Condutividade elétrica no período 2010-2020",
        x="Estação",
        y="µmhos/cm")+
   scale_y_continuous(expand = expansion(mult = c(0.01, 0.05)),
                      n.breaks = 8,
                      limits = c(0,
                                 max(plan_wide_19902020$Condutividade, na.rm = TRUE)),
                      labels = scales::number_format(accuracy = 1,
                                                     decimal.mark = ",",
                                                     big.mark = " "))+
    ggbeeswarm::geom_quasirandom(
     size = 1.2,
     alpha = .25,
     width = .07,
   )+
   scale_x_discrete(limits = c("87398500", 
                               "87398980", 
                               "87398900", 
                               "87398950", 
                               "87405500", 
                               "87406900", 
                               "87409900"),
                    labels = c("PM1", "PM2", "PM3", "PM4", "PM5", "PM6", "PM7")
   )+
   geom_smooth(method = "lm",
               se=FALSE, #se deixar TRUE gera o intervalo de confiança de 95%
               aes(group=1),
               alpha=.5,
               na.rm = TRUE,
               size = 1)+
   theme_grafs()
)
```

```{r Gráfico cond_elet 3 periodos juntos, warning=FALSE, message=FALSE}
grid.arrange(cond_elet_p1, cond_elet_p2, cond_elet_p3, ncol = 3)
```

```{r Sumário cond_elet}
(sum_cond_elet_p1 <- plan_wide_19902020 %>%
   select(CODIGO, Condutividade, ANO_COLETA) %>% 
   filter(ANO_COLETA>"1990" &
            ANO_COLETA<="2000") %>% 
   group_by(CODIGO) %>% 
   summarize(
     min = 
       min(Condutividade, 
           na.rm = TRUE),
     q1 = 
       quantile(Condutividade, 0.25, 
                na.rm = TRUE),
     median = 
       median(Condutividade, 
              na.rm = TRUE),
     mean = 
       mean(Condutividade, 
            na.rm= TRUE),
     q3 = 
       quantile(Condutividade, 0.75, 
                na.rm = TRUE),
     max = 
       max(Condutividade, 
           na.rm = TRUE))
)

(sum_cond_elet_p2 <- plan_wide_19902020 %>%
    select(CODIGO, Condutividade, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2000" &
             ANO_COLETA<="2010") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Condutividade, 
            na.rm = TRUE),
      q1 = 
        quantile(Condutividade, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Condutividade, 
               na.rm = TRUE),
      mean = 
        mean(Condutividade, 
             na.rm= TRUE),
      q3 = 
        quantile(Condutividade, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Condutividade, 
            na.rm = TRUE))
)

(sum_cond_elet_p3 <- plan_wide_19902020 %>%
    select(CODIGO, Condutividade, ANO_COLETA) %>% 
    filter(ANO_COLETA>"2010" &
             ANO_COLETA<="2020") %>% 
    group_by(CODIGO) %>% 
    summarize(
      min = 
        min(Condutividade, 
            na.rm = TRUE),
      q1 = 
        quantile(Condutividade, 0.25, 
                 na.rm = TRUE),
      median = 
        median(Condutividade, 
               na.rm = TRUE),
      mean = 
        mean(Condutividade, 
             na.rm= TRUE),
      q3 = 
        quantile(Condutividade, 0.75, 
                 na.rm = TRUE),
      max = 
        max(Condutividade, 
            na.rm = TRUE),
      n = 
        length(Condutividade))
)

# plan_wide_19902020 %>% 
#    select(CODIGO, IQA) %>% 
#    group_by(CODIGO) %>% 
#    summarize(
#       min = 
#          min(IQA, 
#              na.rm = TRUE),
#       q1 = 
#          quantile(IQA, 0.25, 
#                   na.rm = TRUE),
#       median = 
#          median(IQA, 
#                 na.rm = TRUE),
#       mean = 
#          mean(IQA, 
#               na.rm= TRUE),
#       q3 = 
#          quantile(IQA, 0.75, 
#                   na.rm = TRUE),
#       max = 
#          max(IQA, 
#              na.rm = TRUE))
```

```{r Salvando cond_elet}
ggsave("cond_elet_p1.png",
       plot = cond_elet_p1,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("cond_elet_p2.png",
       plot = cond_elet_p2,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("cond_elet_p3.png",
       plot = cond_elet_p3,
       path = "./graficos",
       dpi = 300,
       type = "cairo")

ggsave("cond_elet_3periodos.png",
       units = c("px"),
       width = 4500,
       height = 2993,
       plot = grid.arrange(cond_elet_p1, cond_elet_p2, cond_elet_p3, ncol = 3),
       path = "./graficos",
       dpi = 300,
       type = "cairo")

```
